diff --git a/README.md b/README.md index 83768723ce..cbae8ef077 100644 --- a/README.md +++ b/README.md @@ -51,10 +51,6 @@ This table provides the current status of the migration of specific ROCm systems | `rocminfo` | 8/11 | | `rocr-runtime` | 8/11 | | `rocm-core` | 8/12 | -| `clr` | 8/13 | -| `hip` | 8/13 | -| `hipother` | 8/13 | -| `hip-tests` | 8/13 | *Remaining schedule to be determined. diff --git a/projects/rocprofiler-compute/.azuredevops/rocm-ci-mainline.yml b/projects/rocprofiler-compute/.azuredevops/rocm-ci-mainline.yml new file mode 100644 index 0000000000..042e985b8a --- /dev/null +++ b/projects/rocprofiler-compute/.azuredevops/rocm-ci-mainline.yml @@ -0,0 +1,65 @@ +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + +variables: +- group: common +- template: /.azuredevops/variables-global.yml@pipelines_repo + +trigger: + batch: true + branches: + include: + - amd-mainline + paths: + exclude: + - .github + - docs + - '.*.y*ml' + - '*.md' + - AUTHORS + - LICENSE + - VERSION + +pr: + autoCancel: true + branches: + include: + - amd-mainline + paths: + exclude: + - .github + - docs + - '.*.y*ml' + - '*.md' + - AUTHORS + - LICENSE + - VERSION + drafts: false + +# For changes to mainline, only build & test against mainline ROCm +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-compute.yml@pipelines_repo + parameters: + jobMatrix: + buildJobs: + - gfx942-mainline: + name: gfx942_mainline + target: gfx942 + dependencySource: mainline + - gfx90a-mainline: + name: gfx90a_mainline + target: gfx90a + dependencySource: mainline + testJobs: + - gfx942-mainline: + name: gfx942_mainline + target: gfx942 + dependencySource: mainline + - gfx90a-mainline: + name: gfx90a_mainline + target: gfx90a + dependencySource: mainline diff --git a/projects/rocprofiler-compute/.azuredevops/rocm-ci.yml b/projects/rocprofiler-compute/.azuredevops/rocm-ci.yml new file mode 100644 index 0000000000..26e4d6095a --- /dev/null +++ b/projects/rocprofiler-compute/.azuredevops/rocm-ci.yml @@ -0,0 +1,47 @@ +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + +variables: +- group: common +- template: /.azuredevops/variables-global.yml@pipelines_repo + +trigger: + batch: true + branches: + include: + - develop + - amd-staging + paths: + exclude: + - .github + - docs + - '.*.y*ml' + - '*.md' + - AUTHORS + - LICENSE + - VERSION + +pr: + autoCancel: true + branches: + include: + - develop + - amd-staging + paths: + exclude: + - .github + - docs + - '.*.y*ml' + - '*.md' + - AUTHORS + - LICENSE + - VERSION + drafts: false + +# For changes to develop and staging, build & test against both staging and mainline ROCm +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-compute.yml@pipelines_repo diff --git a/projects/rocprofiler-compute/.cmake-format.yaml b/projects/rocprofiler-compute/.cmake-format.yaml new file mode 100644 index 0000000000..2e00dc211f --- /dev/null +++ b/projects/rocprofiler-compute/.cmake-format.yaml @@ -0,0 +1,65 @@ +parse: + additional_commands: {} + override_spec: {} + vartags: [] + proptags: [] +format: + disable: false + line_width: 90 + tab_size: 4 + use_tabchars: false + fractional_tab_policy: use-space + max_subgroups_hwrap: 2 + max_pargs_hwrap: 6 + max_rows_cmdline: 2 + separate_ctrl_name_with_space: false + separate_fn_name_with_space: false + dangle_parens: false + dangle_align: child + min_prefix_chars: 4 + max_prefix_chars: 10 + max_lines_hwrap: 2 + line_ending: unix + command_case: lower + keyword_case: upper + always_wrap: [] + enable_sort: true + autosort: false + require_valid_layout: false + layout_passes: {} +markup: + bullet_char: '-' + enum_char: '*' + first_comment_is_literal: true + literal_comment_pattern: ^# + fence_pattern: ^\s*([`~]{3}[`~]*)(.*)$ + ruler_pattern: ^\s*[^\w\s]{3}.*[^\w\s]{3}$ + explicit_trailing_pattern: '#<' + hashruler_min_length: 10 + canonicalize_hashrulers: true + enable_markup: true +lint: + disabled_codes: [] + function_pattern: '[0-9a-z_]+' + macro_pattern: '[0-9A-Z_]+' + global_var_pattern: '[A-Z][0-9A-Z_]+' + internal_var_pattern: _[A-Z][0-9A-Z_]+ + local_var_pattern: '[a-z][a-z0-9_]+' + private_var_pattern: _[0-9a-z_]+ + public_var_pattern: '[A-Z][0-9A-Z_]+' + argument_var_pattern: '[a-z][a-z0-9_]+' + keyword_pattern: '[A-Z][0-9A-Z_]+' + max_conditionals_custom_parser: 2 + min_statement_spacing: 1 + max_statement_spacing: 2 + max_returns: 6 + max_branches: 12 + max_arguments: 5 + max_localvars: 15 + max_statements: 50 +encode: + emit_byteorder_mark: false + input_encoding: utf-8 + output_encoding: utf-8 +misc: + per_command: {} diff --git a/projects/rocprofiler-compute/.github/CODEOWNERS b/projects/rocprofiler-compute/.github/CODEOWNERS new file mode 100644 index 0000000000..71826a2dd6 --- /dev/null +++ b/projects/rocprofiler-compute/.github/CODEOWNERS @@ -0,0 +1,13 @@ +* @coleramos425 @feizheng10 @vedithal-amd @xuchen-amd @cfallows-amd @ywang103-amd @jamessiddeley-amd + +# Documentation files +docs/ @ROCm/rocm-documentation @prbasyal-amd +*.md @ROCm/rocm-documentation @prbasyal-amd +*.rst @ROCm/rocm-documentation @prbasyal-amd +.readthedocs.yaml @ROCm/rocm-documentation @prbasyal-amd + +## Packaging +# cmake/ +# tests/ +# CMakeLists.txt +# utils/ diff --git a/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/bug_report.yaml b/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/bug_report.yaml new file mode 100644 index 0000000000..bc6fcb8159 --- /dev/null +++ b/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -0,0 +1,131 @@ +name: Bug Report +description: Report a bug you've encountered for further investigation +title: "[Bug]: " +labels: ["bug", "triage"] +assignees: + - njobypet +body: + - type: markdown + attributes: + value: | + Please complete the following form. + + - type: textarea + id: bug-description + attributes: + label: Describe the bug + description: A clear and concise description of what the bug is. + placeholder: e.g. I found the following error when trying to... + validations: + required: true + + - type: markdown + attributes: + value: | + ## Development Environment + + - type: input + id: linux-distro + attributes: + label: Linux Distribution + description: | + What operating system are you using? Hint: + ```shell + echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)" + ``` + placeholder: e.g. Ubuntu 22.04 + validations: + required: true + + - type: input + id: rocprofiler-compute-version + attributes: + label: ROCm Compute Profiler Version + description: | + What version of ROCm Compute Profiler are you using? Hint: + ```shell + rocprof-compute --version + ``` + placeholder: e.g. 2.1.0 + validations: + required: true + + - type: input + id: gpu + attributes: + label: GPU + description: | + What GPU(s) did you encounter the issue on? Hint: + ```shell + echo "GPU:" && /opt/rocm/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)" + ``` + placeholder: e.g. AMD MI250, AMD MI300X + validations: + required: true + + - type: input + id: rocm-version + attributes: + label: ROCm Version + description: | + What version(s) of ROCm did you encounter the issue on? Deduce from: + ```shell + readlink -f $(which rocprof) + ``` + placeholder: e.g. ROCm 6.0.2 + + - type: input + id: cluster + attributes: + label: Cluster name (if applicable) + description: What is the name of the cluster you are using? + placeholder: e.g. Frontier, El Capitan, etc. + + - type: markdown + attributes: + value: | + ## To Reproduce + + - type: textarea + id: reproducer + attributes: + label: Reproducer + description: Steps to reproduce the behavior + placeholder: | + 1. Run '...' + 2. Go to '...' + 3. Click on '....' + 4. See error + validations: + required: true + + - type: textarea + id: expected-behav + attributes: + label: Expected behavior + description: A clear and concise description of what you expected to happen. + placeholder: e.g. I expected the following to happen... + + - type: markdown + attributes: + value: | + ## Other + + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste rocprofiler-compute's `log.txt` file. This will be automatically formatted into code, so no need for backticks. + render: shell + + - type: textarea + id: screenshots + attributes: + label: Screenshots + description: If applicable, add screenshots to help explain your problem. + + - type: textarea + id: context + attributes: + label: Additional Context + description: Add any other context about the problem here. diff --git a/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/feature_request.yaml b/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/feature_request.yaml new file mode 100644 index 0000000000..fa9b03b6bd --- /dev/null +++ b/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -0,0 +1,45 @@ +name: Feature Request +description: Suggest an idea for this project +title: "[Req]: " +labels: ["enhancement", "triage"] +assignees: + - njobypet +body: + - type: markdown + attributes: + value: | + Please complete the following form. + + - type: textarea + id: problem + attributes: + label: Is your feature request related to a problem? + description: A clear and concise description of what the problem is. + placeholder: e.g. I'm always frustrated when... + validations: + required: true + + - type: textarea + id: solution + attributes: + label: Describe the solution you'd like + description: A clear and concise description of what you want to happen. + placeholder: e.g. I propose that... + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Describe any alternatives you've considered + description: Walk through your thought process and how you arrived at your solution. + placeholder: | + e.g. Some alternative approaches might be: + 1. ... + 2. ... + + - type: textarea + id: context + attributes: + label: Additional context + description: Add any other context or screenshots about the feature request here. diff --git a/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/question.yaml b/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/question.yaml new file mode 100644 index 0000000000..b96702aee4 --- /dev/null +++ b/projects/rocprofiler-compute/.github/ISSUE_TEMPLATE/question.yaml @@ -0,0 +1,23 @@ +name: Question +description: Clarifying questions and uncertainties +labels: ["question"] +body: + - type: markdown + attributes: + value: | + Please complete the following form. + + - type: textarea + id: question + attributes: + label: Describe your question + description: A clear and concise description of your question and how it came up. + placeholder: I was unsure how to ... + validations: + required: true + + - type: textarea + id: context + attributes: + label: Additional context + description: Add any other context or screenshots about the question here. diff --git a/projects/rocprofiler-compute/.github/dependabot.yml b/projects/rocprofiler-compute/.github/dependabot.yml new file mode 100644 index 0000000000..a169714fbe --- /dev/null +++ b/projects/rocprofiler-compute/.github/dependabot.yml @@ -0,0 +1,21 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/docs/sphinx" # Location of package manifests + open-pull-requests-limit: 10 + schedule: + interval: "daily" + target-branch: "develop" + labels: + - "documentation" + - "dependencies" + reviewers: + - "samjwu" + - "feizheng10" + - "coleramos425" + - "vedithal-amd" diff --git a/projects/rocprofiler-compute/.github/pull_request_template.md b/projects/rocprofiler-compute/.github/pull_request_template.md new file mode 100644 index 0000000000..0ab3dfc10b --- /dev/null +++ b/projects/rocprofiler-compute/.github/pull_request_template.md @@ -0,0 +1,34 @@ +# rocprofiler-compute Pull Request + +## Related Issue + +- [ ] Closes # + +## What type of PR is this? (check all that apply) + +- [ ] Bug Fix +- [ ] Cherry Pick +- [ ] Continuous Integration +- [ ] Documentation Update +- [ ] Feature +- [ ] Optimization +- [ ] Refactor +- [ ] Other (please specify) + +## Technical Details + + +## Have you added or updated tests to validate functionality? + +- [ ] Yes +- [ ] No - does not apply to this PR + +## Added / Updated documentation? + +- [ ] Yes +- [ ] No - does not apply to this PR + +## Have you updated CHANGELOG? + +- [ ] Yes +- [ ] No - does not apply to this PR diff --git a/projects/rocprofiler-compute/.github/workflows/daily-staging-sync.yml b/projects/rocprofiler-compute/.github/workflows/daily-staging-sync.yml new file mode 100644 index 0000000000..708428f32c --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/daily-staging-sync.yml @@ -0,0 +1,31 @@ +name: Sync Staging with Develop +on: + workflow_dispatch: + schedule: + - cron: 0 0 * * * + +jobs: + promote-dev-to-stg: + if: github.repository == 'ROCm/rocprofiler-compute' + runs-on: ubuntu-latest + name: Promote Develop to Staging + steps: + - name: Generate a token + id: generate-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_APP_ID }} + private-key: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_PRIVATE_KEY }} + + - name: Checkout + uses: actions/checkout@v4 + with: + ref: develop + fetch-depth: '0' + token: ${{ steps.generate-token.outputs.token }} + + - name: Merge - Fast Forward Only + run: | + git checkout amd-staging + git merge origin/develop --ff-only + git push origin HEAD diff --git a/projects/rocprofiler-compute/.github/workflows/docs.yml b/projects/rocprofiler-compute/.github/workflows/docs.yml new file mode 100644 index 0000000000..11cfff5839 --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/docs.yml @@ -0,0 +1,62 @@ +name: Documentation + +on: + push: + branches: [ amd-mainline ] + paths: + - 'docs/archive/docs-2.x/**' + - 'docs/archive/docs-1.x/**' + - '.github/workflows/docs.yml' + + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + # Build job + build: + runs-on: ubuntu-latest + container: + image: sphinxdoc/sphinx + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Additional python packages + run: pip3 install -r docs/archive/requirements-doc.txt + - name: Setup Pages + uses: actions/configure-pages@v4 + - name: Build 1.x docs + run: | + cd docs/archive/docs-1.x + make html + - name: Build 2.x docs + run: | + cd docs/archive/docs-2.x + make html + - name: Relocate 1.x docs + run: | + mv docs/archive/docs-1.x/_build/html docs/archive/_build/html/1.x + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: ./docs/archive/_build/html + + # Deployment job + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/projects/rocprofiler-compute/.github/workflows/formatting.yml b/projects/rocprofiler-compute/.github/workflows/formatting.yml new file mode 100644 index 0000000000..0f7b924288 --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/formatting.yml @@ -0,0 +1,75 @@ + +name: Formatting + +on: + push: + branches: [ amd-mainline, amd-staging, release/**, develop ] + pull_request: + branches: [ amd-mainline, amd-staging, release/**, develop ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + python: + runs-on: ubuntu-22.04 + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python '3.x' + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install black isort + if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi + - name: Run black formatter + uses: psf/black@stable + with: + use_pyproject: true + - name: Run isort formatter + uses: isort/isort-action@master + + cmake: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v4 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y python3-pip + python3 -m pip install cmake-format + - name: cmake-format + run: | + set +e + cmake-format -i $(find . -type f | egrep 'CMakeLists.txt|\.cmake$') + if [ $(git diff | wc -l) -gt 0 ]; then + echo -e "\nError! CMake code not formatted. Run cmake-format...\n" + echo -e "\nFiles:\n" + git diff --name-only + echo -e "\nFull diff:\n" + git diff + exit 1 + fi + + python-bytecode: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v4 + - name: find-bytecode + run: | + set +e + FILES=$(find . -type f | egrep '__pycache__|\.pyc$') + if [ -n "${FILES}" ]; then + echo -e "\nError! Python bytecode included in commit\n" + echo -e "### FILES: ###" + echo -e "${FILES}" + echo -e "##############" + exit 1 + fi diff --git a/projects/rocprofiler-compute/.github/workflows/mi-rhel9.yml b/projects/rocprofiler-compute/.github/workflows/mi-rhel9.yml new file mode 100644 index 0000000000..9dfb4efaff --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/mi-rhel9.yml @@ -0,0 +1,116 @@ +name: mi-rhel9 + +on: + push: + branches: [ amd-mainline, release/** ] + + # Allows manual execution + workflow_dispatch: + +permissions: + contents: read + checks: write + pull-requests: write + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + profile: + strategy: + matrix: + version: [5.7.1, 6.0.2] + hardware: [mi100, mi200] + profiler: [default, rocprofv2] + exclude: + - profiler: rocprofv2 + hardware: mi100 + fail-fast: false + runs-on: [mi100, rhel9] + + env: + PYTHONPATH: /home1/ciuser/rocprofiler-compute_deps + CI_VISIBLE_DEVICES: 1 + name: ROCm v${{ matrix.version }} / ${{ matrix.hardware }} / ${{ matrix.profiler }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Python Path + run: echo ${PYTHONPATH} + - name: Setup hardware-specific run details + run: | + if [ ${{ matrix.hardware }} == "mi100" ];then + echo "CI_QUEUE=ci" >> $GITHUB_ENV + echo "CI_ARCH=gfx908" >> $GITHUB_ENV + elif [ ${{ matrix.hardware }} == "mi200" ];then + echo "CI_QUEUE=mi2104x" >> $GITHUB_ENV + echo "CI_ARCH=gfx90a" >> $GITHUB_ENV + else + echo "Unsupported hardware" + exit 1 + fi + - name: Setup profiling mode + run: | + if [ ${{ matrix.profiler }} == "rocprofv2" ];then + echo "ROCPROF=rocprofv2" >> $GITHUB_ENV + fi + - name: Install Python collateral (build and test) + run: | + pip3 install -t ${PYTHONPATH} -r requirements.txt + pip3 install -t ${PYTHONPATH} -r requirements-test.txt + - name: Load ROCm ${{ matrix.version}} + run: | + module load cmake + module load rocm/${{ matrix.version }} + echo $PATH > $GITHUB_PATH + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "ROCM_PATH=$ROCM_PATH" >> "$GITHUB_ENV" + - name: Check Environment + run: | + echo "PATH=$PATH" + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + echo "ROCM_PATH=$ROCM_PATH" + echo "CI_QUEUE=$CI_QUEUE" + echo "CI_ARCH=$CI_ARCH" + echo "CI_VISIBLE_DEVICES=$CI_VISIBLE_DEVICES" + echo "ROCPROF=$ROCPROF" + - name: Configure + run: | + mkdir build + cd build + ml cmake + cmake -DENABLE_TESTS=ON -DCMAKE_HIP_ARCHITECTURES=$CI_ARCH -DENABLE_COVERAGE=ON -DPYTEST_NUMPROCS=8 .. + - name: Build tests and Run [profile] mode + run: | + cd build + make + srun -N 1 -J rocprof-compute -p $CI_QUEUE -t 00:20:00 ctest -j 4 --resource-spec-file ../tests/4gpus.json --verbose -L profile + - name: Run [analyze workloads] mode + if: '!cancelled()' + run: | + cd build + srun -N 1 -J rocprof-compute -p $CI_QUEUE -t 00:10:00 ctest --verbose -R test_analyze_workloads + - name: Run [analyze commands] mode + if: '!cancelled()' + run: | + cd build + srun -N 1 -J rocprof-compute -p $CI_QUEUE -t 00:10:00 ctest --verbose -R test_analyze_commands + - name: Publish Test Results + uses: EnricoMi/publish-unit-test-result-action/linux@v2 + if: always() + with: + files: | + tests/**/test_*.xml + - name: Summarize code coverage + if: always() + run: coverage report + - name: Upload code coverage + uses: zgosalvez/github-actions-report-lcov@v4 + if: always() + with: + coverage-files: tests/coverage.info + minimum-coverage: 35 + artifact-name: code-coverage-report-rocm${{ matrix.version }}-${{ matrix.hardware }}-${{ matrix.profiler }} + github-token: ${{ secrets.GITHUB_TOKEN }} + update-comment: true diff --git a/projects/rocprofiler-compute/.github/workflows/packaging.yml b/projects/rocprofiler-compute/.github/workflows/packaging.yml new file mode 100644 index 0000000000..ba30d9af83 --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/packaging.yml @@ -0,0 +1,64 @@ +name: packaging + +on: + push: + tags: + - "v[1-9].[0-9]+.[0-9]+*" + - "rocm-[0-9]+.[0-9]+.[0-9]+*" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + distbuild: + runs-on: ubuntu-latest + name: Create release distribution + env: + INSTALL_DIR: /tmp + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Verify VERSION file consistent with tag + run: utils/ver_check.py --tag ${{github.ref_name}} + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: '3.8' + - name: Python dependency installs + run: python3 -m pip install -t${INSTALL_DIR}/python-libs -r requirements.txt + - name: Configure + run: | + mkdir build + cd build + cmake -DPYTHON_DEPS=${INSTALL_DIR}/python-libs .. + - name: Release tarball + run: | + cd build + make package_source + - name: Rename tarball + run: mv build/rocprofiler-compute-*.tar.gz build/rocprofiler-compute-${{github.ref_name}}.tar.gz + - name: Archive tarball + uses: actions/upload-artifact@v4 + with: + name: rocprofiler-compute-${{github.ref_name}}.tar.gz + path: build/rocprofiler-compute-${{github.ref_name}}.tar.gz + - name: Set version + run: echo "VERSION=$(cat VERSION)" >> $GITHUB_ENV + - name: Determine release name + run: | + if [[ ${{github.ref_name}} == rocm-* ]]; then + echo "RELEASE_NAME=rocprofiler-compute ${{ env.VERSION }} for ${{github.ref_name}}" + else + echo "RELEASE_NAME=rocprofiler-compute ${{ env.VERSION }}" + fi >> $GITHUB_ENV + - name: Upload tarball Release Asset + uses: softprops/action-gh-release@v2 + if: startsWith(github.ref, 'refs/tags/') && github.repository == 'ROCm/rocprofiler-compute' + with: + fail_on_unmatched: True + generate_release_notes: True + draft: False # toggle for debugging + files: | + build/rocprofiler-compute-${{github.ref_name}}.tar.gz + name: ${{ env.RELEASE_NAME }} diff --git a/projects/rocprofiler-compute/.github/workflows/rhel-8.yml b/projects/rocprofiler-compute/.github/workflows/rhel-8.yml new file mode 100644 index 0000000000..0181d49216 --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/rhel-8.yml @@ -0,0 +1,71 @@ +# This is a basic workflow to help you get started with Actions + +name: RHEL 8/9 + +# Controls when the workflow will run +on: + push: + branches: [ amd-mainline, amd-staging, release/**, develop ] + paths-ignore: + - '*.md' + - '.github/**/*.md' + - 'docs/**' + - 'docker/**' + pull_request: + branches: [ amd-mainline, amd-staging, release/**, develop ] + paths-ignore: + - '*.md' + - '.github/**/*.md' + - 'docs/**' + - 'docker/**' + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + container: + image: dgaliffiamd/rocprofiler-systems:ci-base-rhel-${{ matrix.os-release }} + strategy: + fail-fast: false + matrix: + os-release: [ '8.10', '9.3'] + build-type: ['Release'] + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + - name: Install baseline OS dependencies + run: | + yum clean all + yum makecache + yum -y install git + yum -y install python39 + yum -y install cmake3 + yum -y install which + yum -y install glibc-langpack-en + - name: Checkout + uses: actions/checkout@v4 + - name: Install Python prereqs + run: | + python3.9 -m pip install -r requirements.txt + python3.9 -m pip install -r requirements-test.txt + - name: Configure and install + run: | + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-compute -DPYTEST_NUMPROCS=4 .. + make install + - name: CTest- Analyze Commands + run: | + cd build + ctest --verbose -R test_analyze_commands + - name: CTest- Analyze Workloads + run: | + cd build + ctest --verbose -R test_analyze_workloads diff --git a/projects/rocprofiler-compute/.github/workflows/tarball.yml b/projects/rocprofiler-compute/.github/workflows/tarball.yml new file mode 100644 index 0000000000..d7db8f5bbd --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/tarball.yml @@ -0,0 +1,112 @@ +name: tarball + +on: + push: + branches: [ amd-mainline, release/** ] + pull_request: + paths-ignore: + - '*.md' + - '.github/**/*.md' + - 'docs/**' + - 'docker/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + distbuild: + runs-on: ubuntu-latest + name: Create distribution tarball + env: + INSTALL_DIR: /tmp/foo1 + steps: + - name: Set git sha mode + id: sha-mode + run: | + if [ "$EVENT" == 'pull_request' ]; then + echo "sha=${{github.event.pull_request.head.sha}}" >> $GITHUB_OUTPUT + else + echo "sha=$GITHUB_SHA" >> $GITHUB_OUTPUT + fi + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ steps.sha-mode.sha }} + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: '3.8' + - name: Python dependency installs + run: python3 -m pip install -t${INSTALL_DIR}/python-libs -r requirements.txt + - name: Configure + run: | + mkdir build + cd build + cmake -DPYTHON_DEPS=${INSTALL_DIR}/python-libs .. + - name: Release tarball + run: | + cd build + make package_source + - name: Archive tarball + uses: actions/upload-artifact@v4 + with: + name: tarball-testing + path: build/rocprofiler-compute-*.tar.gz + retention-days: 3 + disttest: + runs-on: ubuntu-latest + needs: [distbuild] + name: Tarball tests + env: + INSTALL_DIR: /tmp/foo2 + steps: + - name: Access tarball + uses: actions/download-artifact@v4 + with: + name: tarball-testing + - name: Expand + run: tar xfz rocprofiler-compute-*.tar.gz; rm rocprofiler-compute-*.tar.gz + - name: Python dependency installs + run: | + cd rocprofiler-compute-* + python3 -m pip install -t${INSTALL_DIR}/python-libs -r requirements.txt + - name: Configure + run: | + cd rocprofiler-compute-* + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/rocprofiler-compute \ + -DPYTHON_DEPS=${INSTALL_DIR}/python-libs .. + - name: Install + run: | + cd rocprofiler-compute-* + cd build + make install + - name: Verify expected paths + run: | + # find $INSTALL_DIR + test -d $INSTALL_DIR/rocprofiler-compute + test -x $INSTALL_DIR/rocprofiler-compute/bin/rocprof-compute + test -s $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/VERSION + test -s $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/VERSION.sha + test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/rocprof_compute_analyze + test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/rocprof_compute_profile + test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/rocprof_compute_soc + test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/utils + test -s $INSTALL_DIR/rocprofiler-compute/share/rocprofiler-compute/sample/vcopy.cpp + test -d $INSTALL_DIR/rocprofiler-compute/share/rocprofiler-compute/modulefiles + test -s $INSTALL_DIR/rocprofiler-compute/share/doc/rocprofiler-compute/LICENSE + - name: Query version (setting PYTHONPATH by hand) + run: | + export PYTHONPATH=${INSTALL_DIR}/python-libs:$PYTHONPATH + $INSTALL_DIR/rocprofiler-compute/bin/rocprof-compute --version + - name: Install Lmod + run: sudo apt-get install -y lmod + - name: Access rocprofiler-compute using modulefile + run: | + . /etc/profile.d/lmod.sh + module use $INSTALL_DIR/rocprofiler-compute/share/rocprofiler-compute/modulefiles + module load rocprofiler-compute + module list + rocprof-compute --version diff --git a/projects/rocprofiler-compute/.github/workflows/ubuntu-jammy.yml b/projects/rocprofiler-compute/.github/workflows/ubuntu-jammy.yml new file mode 100644 index 0000000000..61e1a95fb1 --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/ubuntu-jammy.yml @@ -0,0 +1,61 @@ +# This is a basic workflow to help you get started with Actions + +name: Ubuntu 22.04 + +on: + push: + branches: [ amd-mainline, amd-staging, release/**, develop ] + paths-ignore: + - '*.md' + - '.github/**/*.md' + - 'docs/**' + - 'docker/**' + pull_request: + branches: [ amd-mainline, amd-staging, release/**, develop ] + paths-ignore: + - '*.md' + - '.github/**/*.md' + - 'docs/**' + - 'docker/**' + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + container: + image: dgaliffiamd/rocprofiler-systems:ci-base-ubuntu-22.04 + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + - name: Install baseline OS dependencies + run: | + apt-get update + apt-get install -y git + apt-get install -y python3-pip + apt-get install -y cmake + - name: Checkout + uses: actions/checkout@v4 + - name: Install Python prereqs + run: | + python3 -m pip install -r requirements.txt + python3 -m pip install -r requirements-test.txt + - name: Configure and install + run: | + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-compute -DPYTEST_NUMPROCS=4 .. + make install + - name: CTest- Analyze Commands + run: | + cd build + ctest --verbose -R test_analyze_commands + - name: CTest- Analyze Workloads + run: | + cd build + ctest --verbose -R test_analyze_workloads diff --git a/projects/rocprofiler-compute/.github/workflows/weekly-liangdin-test-rebase.yml b/projects/rocprofiler-compute/.github/workflows/weekly-liangdin-test-rebase.yml new file mode 100644 index 0000000000..86b9872b60 --- /dev/null +++ b/projects/rocprofiler-compute/.github/workflows/weekly-liangdin-test-rebase.yml @@ -0,0 +1,31 @@ +name: Rebase liangdin-test on top of amd-mainline +on: + workflow_dispatch: + schedule: + - cron: 0 0 * * 1 + +jobs: + promote-dev-to-stg: + if: github.repository == 'ROCm/rocprofiler-compute' + runs-on: ubuntu-latest + name: Rebase liagndin-test on top of amd-mainline + steps: + - name: Generate a token + id: generate-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_APP_ID }} + private-key: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_PRIVATE_KEY }} + + - name: Checkout + uses: actions/checkout@v4 + with: + ref: develop + fetch-depth: '0' + token: ${{ steps.generate-token.outputs.token }} + + - name: Rebase + run: | + git checkout liangdin-test + git rebase origin/amd-mainline + git push origin HEAD diff --git a/projects/rocprofiler-compute/.gitignore b/projects/rocprofiler-compute/.gitignore new file mode 100644 index 0000000000..38a7fd3d88 --- /dev/null +++ b/projects/rocprofiler-compute/.gitignore @@ -0,0 +1,25 @@ +# mongodb_connector files +__pycache__ + +# edit files +*~ + +# generated files/folders +/dist +/omniperf.spec +/build* +/.vscode +/.cache +/.venv +/workloads +.coverage +saved_analysis +pmc_kernel_top.csv +VERSION.sha + +# temp files +/tests/Testing + +# documentation artifacts +/_build +_toc.yml diff --git a/projects/rocprofiler-compute/.pre-commit-config.yaml b/projects/rocprofiler-compute/.pre-commit-config.yaml new file mode 100644 index 0000000000..c7c3c76c59 --- /dev/null +++ b/projects/rocprofiler-compute/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +default_stages: [pre-commit] +fail_fast: true +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + # Python import sorting + - repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + # Python formatting (Using this mirror lets us use mypyc-compiled black, which is about 2x faster) + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 25.1.0 + hooks: + - id: black diff --git a/projects/rocprofiler-compute/.readthedocs.yaml b/projects/rocprofiler-compute/.readthedocs.yaml new file mode 100644 index 0000000000..add922e784 --- /dev/null +++ b/projects/rocprofiler-compute/.readthedocs.yaml @@ -0,0 +1,16 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +sphinx: + configuration: docs/conf.py + +build: + os: ubuntu-22.04 + tools: + python: "3.10" + +python: + install: + - requirements: docs/sphinx/requirements.txt diff --git a/projects/rocprofiler-compute/.zenodo.json b/projects/rocprofiler-compute/.zenodo.json new file mode 100644 index 0000000000..37f2c86693 --- /dev/null +++ b/projects/rocprofiler-compute/.zenodo.json @@ -0,0 +1,24 @@ +{ + "creators": [ + { + "affiliation": "AMD", + "name": "Xiaomin Lu" + }, + { + "affiliation": "AMD Research", + "name": "Cole Ramos" + }, + { + "affiliation": "AMD", + "name": "Fei Zheng" + }, + { + "affiliation": "AMD Research", + "name": "Karl W. Schulz" + }, + { + "affiliation": "AMD Research", + "name": "Jose Santos" + } + ] +} diff --git a/projects/rocprofiler-compute/AUTHORS b/projects/rocprofiler-compute/AUTHORS new file mode 100644 index 0000000000..48bc2a9ece --- /dev/null +++ b/projects/rocprofiler-compute/AUTHORS @@ -0,0 +1,12 @@ +# This is the list of ROCm Compute Profiler's significant contributors. +# +# This does not necessarily list everyone who has contributed code, +# especially since many employees of one corporation may be contributing. +# To see the full list of contributors, see the revision history in +# source control. +Xiaomin Lu +Cole Ramos +Karl Schultz +Fei Zheng +Nicholas Curtis +Jose Santos diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md new file mode 100644 index 0000000000..45303bdc3b --- /dev/null +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -0,0 +1,289 @@ +# Changelog for ROCm Compute Profiler + +Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/). + +## Unreleased + +### Added + +* Support Roofline plot on CLI (single run) + +* Stochastic (hardware-based) PC sampling has been enabled for AMD Instinct MI300X series and later accelerators. + +* Sorting of PC sampling by type: offset or count. + +* Add rocprof-compute Text User Interface (TUI) support for analyze mode (beta version) + * A command line based user interface to support interactive single-run analysis + * launch with `--tui` option in analyze mode. i.e., `rocprof-compute analyze --tui` + +* Add support to be able to acquire from rocprofv3 every single channle on each XCD of TCC counters + +* Add Docker files to package the application and dependencies into a single portable and executable standalone binary file + +* Analysis report based filtering + * -b option in profile mode now additionally accepts metric id(s) for analysis report based filtering + * -b option in profile mode also accept hardware IP block for filtering, however, this support will be deprecated soon + * --list-metrics option added in profile mode to list possible metric id(s), similar to analyze mode + +* Data type selection option for roofline profiling + * --roofline-data-type / -R option added to specify which data types the user wants to capture in the roofline PDF plot outputs + * Default is FP32, but user can specify as many types as desired to overlay on the same plot output + +* Additional data types for roofline profiling + * Now supports FP4, FP6, FP8, FP16, BF16, FP32, FP64, I8, I32, I64 (dependent on gpu architecture) + +* Support host-trap PC Sampling on CLI (beta version) + +* Support for AMD Instinct MI350 series GPUs with the addition of the following counters: + * VALU co-issue (Two VALUs are issued instructions) efficiency + * Stream Processor Instruction (SPI) Wave Occupancy + * Scheduler-Pipe Wave Utilization + * Scheduler FIFO Full Rate + * CPC ADC Utilization + * F6F4 data type metrics + * Update formula for total FLOPs while taking into account F6F4 ops + * LDS STORE, LDS LOAD, LDS ATOMIC instruction count metrics + * LDS STORE, LDS LOAD, LDS ATOMIC bandwidth metrics + * LDS FIFO full rate + * Sequencer -> TA ADDR Stall rates + * Sequencer -> TA CMD Stall rates + * Sequencer -> TA DATA Stall rates + * L1 latencies + * L2 latencies + * L2 to EA stalls + * L2 to EA stalls per channel + +* Roofline support for RHEL 10 + +* Roofline support for MI350 series architecture + +* Interface to rocprofiler-sdk + * Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script + * Add --rocprofiler-sdk-library-path runtime option to choose the path to rocprofiler-sdk library to be used + * Using rocprof v1 / v2 / v3 interfaces will trigger a deprecation warning to use rocprofiler-sdk interface + +* Support MEM chart on CLI (single run) + +* Add deprecation warning for database update mode. + +### Changed + +* Change the default rocprof version to rocprofv3, this is used when environment variable "ROCPROF" is not set +* Change the rocprof version for unit tests to rocprofv3 on all SoCs except MI100 +* Change normal_unit default to per_kernel +* Change dependency from rocm-smi to amd-smi +* Decrease profiling time by not collecting counters not used in post analysis +* Update definition of following metrics for MI 350: + * VGPR Writes + * Total FLOPs (consider fp6 and fp4 ops) +* Update Dash to >=3.0.0 (for web UI) +* Change when Roofline PDFs are generated- during general profiling and --roof-only profiling (skip only when --no-roof option is present) +* Update Roofline binaries + * Rebuild using latest ROCm stack + * OS distribution support minimum for roofline feature is now Ubuntu22.04, RHEL9, and SLES15SP6 + +### Optimized + +* ROCm Compute Profiler CLI has been improved to better display the GPU architecture analytics + +### Resolved issues + +* Fixed MI 100 counters not being collected when rocprofv3 is used +* Fixed option specs-correction +* Fixed kernel name and kernel dispatch filtering when using rocprof v3 +* Fixed not collecting TCC channel counters in rocprof v3 +* Fixed peak FLOPS of F8 I8 F16 and BF16 on MI300 + +### Known issues + +* On MI 100, accumulation counters will not be collected and the following metrics will not show up in analysis: Instruction Fetch Latency, Wavefront Occupancy, LDS Latency + * As a workaround, use ROCPROF=rocprof environement variable, to use rocprofv1 for profiling on MI 100 + +* GPU id filtering is not supported when using rocprof v3 + +* Analysis of previously collected workload data will not work due to sysinfo.csv schema change + * As a workaround, run the profiling operation again for the workload and interrupt the process after ten seconds. + Followed by copying the `sysinfo.csv` file from the new data folder to the old one. + This assumes your system specification hasn't changed since the creation of the previous workload data. + +* Analysis of new workloads might require providing shader/memory clock speed using +--specs-correction operation if `amd-smi` or `rocminfo` does not provide clock speeds. + +* Memory chart on CLI might look corrupted if CLI width is too narrow + +### Removed + +* Roofline support for Ubuntu 20.04 and SLES below 15.6 +* Usage of rocm-smi + +## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0 + +### Added + +* Roofline support for Ubuntu 24.04 +* Experimental support rocprofv3 (not enabled as default) + +### Resolved issues + +* Fixed PoP of VALU Active Threads +* Workaround broken mclk for old version of rocm-smi + +## ROCm Compute Profiler 3.0.0 for ROCm 6.3.0 + +### Changed + +* Renamed Omniperf to ROCm Compute Profiler (#475) + +## Omniperf 2.0.1 for ROCm 6.2.1 + +### Changed + +* enable rocprofv1 for MI300 hardware (#391) +* refactoring and updating documemtation (#362, #394, #398, #414, #420) +* branch renaming and workflow updates (#389, #404, #409) +* bug fix for analysis output +* add dependency checks on application launch (#393) +* patch for profiling multi-process/multi-GPU applications (#376, #396) +* packaging updates (#386) +* rename CHANGES to CHANGELOG.md (#410) +* rollback Grafana version in Dockerfile for Angular plugin compatibility (#416) +* enable CI triggers for Azure CI (#426) +* add GPU model distinction for MI300 systems (#423) +* new MAINTAINERS.md guide for omniperf publishing procedures (#402) + +### Optimized + +* reduced running time of Omniperf when profiling (#384) +* console logging improvements + +## Omniperf 2.0.1 for ROCm 6.2.0 + +### Added + + * new option to force hardware target via `OMNIPERF_ARCH_OVERRIDE` global (#370) + * CI/CD support for MI300 hardware (#373) + * support for MI308X hardware (#375) + +### Optimized + + * cmake build improvements (#374) + +## Omniperf 2.0.0 (17 May 2024) + + * improved logging than spans all modes (#177) (#317) (#335) (#341) + * overhauled CI/CD that spans all modes (#179) + * extensible SoC classes to better support adding new hardware configs (#180) + * --kernel-verbose no longer overwrites kernel names (#193) + * general cleanup and improved organization of source code (#200) (#210) + * separate requirement files for docs and testing dependencies (#205) (#262) (#358) + * add support for MI300 hardware (#231) + * upgrade Grafana assets and build script to latest release (#235) + * update minimum ROCm and Python requirements (#277) + * sort rocprofiler input files prior to profiling (#304) + * new --quiet option will suppress verbose output and show a progress bar (#308) + * roofline support for Ubuntu 22.04 (#319) + +## Omniperf 1.1.0-PR1 (13 Oct 2023) + + * standardize headers to use 'avg' instead of 'mean' + * add color code thresholds to standalone gui to match grafana + * modify kernel name shortener to use cpp_filt (#168) + * enable stochastic kernel dispatch selection (#183) + * patch grafana plugin module to address a known issue in the latest version (#186) + * enhanced communication between analyze mode kernel flags (#187) + +## Omniperf 1.0.10 (22 Aug 2023) + + * critical patch for detection of llvm in rocm installs on SLURM systems + +## Omniperf 1.0.9 (17 Aug 2023) + + * add units to L2 per-channel panel (#133) + * new quickstart guide for Grafana setup in docs (#135) + * more detail on kernel and dispatch filtering in docs (#136, #137) + * patch manual join utility for ROCm >5.2.x (#139) + * add % of peak values to low level speed-of-light panels (#140) + * patch critical bug in Grafana by removing a deprecated plugin (#141) + * enhancements to KernelName demangeler (#142) + * general metric updates and enhancements (#144, #155, #159) + * add min/max/avg breakdown to instruction mix panel (#154) + +## Omniperf 1.0.8 (30 May 2023) + + * add `--kernel-names` option to toggle kernelName overlay in standalone roofline plot (#93) + * remove unused python modules (#96) + * fix empirical roofline calculation for single dispatch workloads (#97) + * match color of arithmetic intensity points to corresponding bw lines + + * ux improvements in standalone GUI (#101) + * enhanced readability for filtering dropdowns in standalone GUI (#102) + * new logfile to capture rocprofiler output (#106) + * roofline support for sles15 sp4 and future service packs (#109) + * adding dockerfiles for all supported Linux distros + * new examples for `--roof-only` and `--kernel` options added to documentation + + * enable cli analysis in Windows (#110) + * optional random port number in standalone GUI (#111) + * limit length of visible kernelName in `--kernel-names` option (#115) + * adjust metric definitions (#117, #130) + * manually merge rocprof runs, overriding default rocprofiler implementation (#125) + * fixed compatibility issues with Python 3.11 (#131) + +## Omniperf 1.0.8-PR2 (17 Apr 2023) + + * ux improvements in standalone GUI (#101) + * enhanced readability for filtering dropdowns in standalone GUI (#102) + * new logfile to capture rocprofiler output (#106) + * roofline support for sles15 sp4 and future service packs (#109) + * adding dockerfiles for all supported Linux distros + * new examples for `--roof-only` and `--kernel` options added to documentation + +## Omniperf 1.0.8-PR1 (13 Mar 2023) + + * add `--kernel-names` option to toggle kernelName overlay in standalone roofline plot (#93) + * remove unused python modules (#96) + * fix empirical roofline calculation for single dispatch workloads (#97) + * match color of arithmetic intensity points to corresponding bw lines + +## Omniperf 1.0.7 (21 Feb 2023) + + * update documentation (#52, #64) + * improved detection of invalid command line arguments (#58, #76) + * enhancements to standalone roofline (#61) + * enable Omniperf on systems with X-server (#62) + * raise minimum version requirement for rocm (#64) + * enable baseline comparison in CLI analysis (#65) + * add multi-normalization to new metrics (#68, #81) + * support alternative profilers (#70) + * add MI100 configs to override rocprofiler's incomplete default (#75) + * improve error message when no GPU(s) detected (#85) + * separate CI tests by Linux distro and add status badges + +## Omniperf 1.0.6 (21 Dec 2022) + + * CI update: documentation now published via github action (#22) + * better error detection for incomplete ROCm installs (#56) + +## Omniperf 1.0.5 (13 Dec 2022) + + * store application command-line parameters in profiling output (#27) + * enable additional normalizations in CLI mode (#30) + * add missing ubuntu 20.04 roofline binary to packaging (#34) + * update L1 bandwidth metric calculations (#36) + * add L1 <-> L2 bandwidth calculation (#37) + * documentation updates (#38, #41) + * enhanced subprocess logging to identify critical errors in rocprofiler (#50) + * maintain git sha in production installs from tarball (#53) + +## Omniperf 1.0.4 (11 Nov 2022) + + * update python requirements.txt with minimum versions for numpy and pandas + * addition of progress bar indicator in web-based GUI (#8) + * reduced default content for web-based GUI to reduce load times (#9) + * minor packaging and CI updates + * variety of documentation updates + * added an optional argument to vcopy.cpp workload example to specify device id + +## Omniperf 1.0.3 (07 Nov 2022) + + * initial Omniperf release diff --git a/projects/rocprofiler-compute/CMakeLists.txt b/projects/rocprofiler-compute/CMakeLists.txt new file mode 100644 index 0000000000..1287691670 --- /dev/null +++ b/projects/rocprofiler-compute/CMakeLists.txt @@ -0,0 +1,618 @@ +cmake_minimum_required(VERSION 3.19 FATAL_ERROR) + +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL + CMAKE_SOURCE_DIR) + set(MSG "") + message(STATUS "Warning! Building from the source directory is not recommended") + message(STATUS "If unintended, please remove 'CMakeCache.txt' and 'CMakeFiles'") + message(STATUS "and build from a separate directory") + message(FATAL_ERROR "In-source build") +endif() + +# System info +cmake_host_system_information(RESULT LOCALHOST QUERY FQDN) +message(STATUS "Hostname: ${LOCALHOST}") + +# Versioning info derived from file +file(READ "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" FULL_VERSION_STRING LIMIT_COUNT 1) +string(REGEX REPLACE "(\n|\r)" "" FULL_VERSION_STRING "${FULL_VERSION_STRING}") +set(ROCPROFCOMPUTE_FULL_VERSION "${FULL_VERSION_STRING}") +string(REGEX REPLACE "([0-9]+)\.([0-9]+)\.([0-9]+)(.*)" "\\1.\\2.\\3" + ROCPROFCOMPUTE_VERSION "${FULL_VERSION_STRING}") + +# string(REGEX REPLACE "(${ROCPROFCOMPUTE_VERSION})(.*)" "\\2" +# ROCPROFCOMPUTE_VERSION_TWEAK +# "${FULL_VERSION_STRING}") +# string(REGEX REPLACE "^\\." "" ROCPROFCOMPUTE_VERSION_TWEAK +# "${ROCPROFCOMPUTE_VERSION_TWEAK}") + +project( + rocprofiler-compute + VERSION ${ROCPROFCOMPUTE_VERSION} + LANGUAGES C + DESCRIPTION + "A kernel-level profiling tool for machine learning/HPC workloads running on AMD MI GPUs" + HOMEPAGE_URL "https://github.com/ROCm/rocprofiler-compute") + +set(PACKAGE_NAME "rocprofiler-compute") +set(PACKAGE_NAME_UNDERSCORE "rocprofiler_compute") +set(EXECUTABLE_NAME "rocprof-compute") + +include(ExternalProject) +include(GNUInstallDirs) + +# version control info +find_package(Git) +if(Git_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") + execute_process( + COMMAND git log --pretty=format:%h -n 1 + OUTPUT_VARIABLE ROCPROFCOMPUTE_GIT_REV + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Git revision: ${ROCPROFCOMPUTE_GIT_REV}") + set(GIT_CLONE TRUE) +else() + set(GIT_CLONE FALSER) +endif() + +set(CMAKE_BUILD_TYPE "Release") +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX + "/opt/rocm" + CACHE PATH "default install path" FORCE) +endif() +message(STATUS "Installation path: ${CMAKE_INSTALL_PREFIX}") + +option(CHECK_PYTHON_DEPS "Verify necessary python dependencies" ON) +if(CHECK_PYTHON_DEPS) + # Python 3 is required + message(STATUS "Detecting Python interpreter...") + find_package( + Python3 3.8 + COMPONENTS Interpreter + REQUIRED) + + # Allow user-provided python search path + if(DEFINED PYTHON_DEPS) + set(ENV{PYTHONPATH} "${PYTHON_DEPS}") + message(STATUS "Optional PYTHON_DEPS provided:") + list(APPEND CMAKE_MESSAGE_INDENT " ") + message(STATUS "including ${PYTHON_DEPS} in search path") + list(POP_BACK CMAKE_MESSAGE_INDENT) + endif() + + # Check required Python packages + file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" pythonDeps) + + message(STATUS "Checking for required Python package dependencies...") + set_property(GLOBAL PROPERTY pythonDepsFlag "groovy") + + function(checkPythonPackage [package]) + # mapping for non-default package names + set(PACKAGE ${ARGV0}) + if(${ARGV0} STREQUAL "pyyaml") + set(PACKAGE "yaml") + endif() + # Skip check for textual-fspicker + if(${package} STREQUAL "textual-fspicker") + message(STATUS "Skipping check for textual-fspicker") + return() + endif() + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "import ${PACKAGE}" + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE EXIT_CODE) + if(${EXIT_CODE} EQUAL 0) + message(STATUS "${ARGV0} = yes") + else() + message(STATUS "${ARGV0} = missing") + set_property(GLOBAL PROPERTY pythonDepsFlag "missing") + endif() + endfunction() + + list(APPEND CMAKE_MESSAGE_INDENT " ") + foreach(package IN LISTS pythonDeps) + # Filter out any version requirements from requirements.txt + string(REGEX REPLACE "[><=].*" "" package "${package}") + string(REPLACE "-" "_" package "${package}") + checkpythonpackage(${package}) + endforeach() + list(POP_BACK CMAKE_MESSAGE_INDENT) + + get_property(pythonDepsInstalled GLOBAL PROPERTY pythonDepsFlag) + if(${pythonDepsInstalled} STREQUAL "groovy") + message(STATUS "OK: Python dependencies available in current environment.") + else() + message( + FATAL_ERROR + "\nNecessary Python package dependencies not found. Please install required dependencies " + "above using your favorite package manager. If using pip, consider running:\n" + "python3 -m pip install -r requirements.txt\n" + "at the top-level of this repository. If preparing a shared installation for " + "multiple users, consider adding the -t option to install necessary dependencies " + "into a shared directory, e.g.\n" + "python3 -m pip install -t -r requirements.txt\n" + "Note that the -DPYTHON_DEPS= can be used to provide an " + "additional search path to cmake for python packages.") + endif() +endif() + +# ---------------------- +# modulefile creation +# ---------------------- + +set(MOD_INSTALL_PATH + "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/modulefiles/${PROJECT_NAME}" + CACHE STRING "Install path for modulefile") +message(STATUS "Modulefile install path: ${MOD_INSTALL_PATH}") + +set(moduleFileTemplate "rocprofcompute.lua.in") + +configure_file( + ${PROJECT_SOURCE_DIR}/cmake/${moduleFileTemplate} + ${PROJECT_BINARY_DIR}/${MOD_INSTALL_PATH}/${ROCPROFCOMPUTE_FULL_VERSION}.lua @ONLY) + +# Thera mods +if(LOCALHOST MATCHES "TheraS01|.*\.thera\.amd\.com|thera-hn") + list(APPEND CMAKE_MESSAGE_INDENT " ") + message(STATUS "Using thera-specific modulefile modification") + file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.thera.mod mod_additions) + file(APPEND + ${PROJECT_BINARY_DIR}/${MOD_INSTALL_PATH}/${ROCPROFCOMPUTE_FULL_VERSION}.lua + ${mod_additions}) + list(POP_BACK CMAKE_MESSAGE_INDENT) +endif() + +# git versioning file +if(${GIT_CLONE}) + configure_file(${PROJECT_SOURCE_DIR}/cmake/VERSION.sha.in + ${PROJECT_SOURCE_DIR}/VERSION.sha @ONLY) +endif() + +# Setup testing collateral + +option(ENABLE_TESTS "Enable compilation of testing collateral" OFF) +set(CMAKE_HIP_FLAGS_RELEASE "-O2") +if(${ENABLE_TESTS}) + enable_language("C" "HIP") + add_subdirectory(tests) + +endif() +message(STATUS "Enable tests compilation: ${ENABLE_TESTS}") + +enable_testing() + +option(ENABLE_COVERAGE "Enable code coverage" OFF) +set(COV_OPTION "") +if(${ENABLE_COVERAGE}) + set(COV_OPTION "--cov=src" "--cov-append" "--cov-report=term-missing" + "--cov-report=lcov:tests/coverage.info") + # "--cov-report=term-missing" "--cov-report=xml:tests/coverage.xml") +endif() +message(STATUS "Code coverage: ${ENABLE_COVERAGE}") + +# CPU threads available for testing +set(PYTEST_NUMPROCS + "1" + CACHE STRING "Number of parallel threads to use with CPU-oriented tests") +message(STATUS "Pytest CPU threadcount: ${PYTEST_NUMPROCS}") + +# 2 CPU threads available for testing(test-analyze-commands) +set(PYTEST_NUMPROCS_ANALYSIS + "4" + CACHE STRING "Number of parallel threads to use with CPU-oriented tests") +message(STATUS "Pytest CPU threadcount: ${PYTEST_NUMPROCS_ANALYSIS}") + +# --------------------------- +# profile mode tests +# --------------------------- + +add_test( + NAME test_profile_kernel_execution + COMMAND + ${Python3_EXECUTABLE} -m pytest -m kernel_execution + --junitxml=tests/test_profile_kernel_execution.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_profile_ipblocks + COMMAND + ${Python3_EXECUTABLE} -m pytest -m block --junitxml=tests/test_profile_blocks.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +set_property(TEST test_profile_ipblocks PROPERTY COST 11) + +add_test( + NAME test_profile_dispatch + COMMAND + ${Python3_EXECUTABLE} -m pytest -m dispatch + --junitxml=tests/test_profile_dispatch.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +set_property(TEST test_profile_ipblocks PROPERTY COST 5) + +add_test( + NAME test_profile_mem + COMMAND ${Python3_EXECUTABLE} -m pytest -m mem --junitxml=tests/test_profile_mem.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_profile_join + COMMAND ${Python3_EXECUTABLE} -m pytest -m join --junitxml=tests/test_profile_join.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_profile_sort + COMMAND ${Python3_EXECUTABLE} -m pytest -m sort --junitxml=tests/test_profile_sort.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_profile_misc + COMMAND ${Python3_EXECUTABLE} -m pytest -m misc --junitxml=tests/test_profile_misc.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_profile_section + COMMAND + ${Python3_EXECUTABLE} -m pytest -m section + --junitxml=tests/test_profile_section.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +set_tests_properties( + test_profile_kernel_execution + test_profile_ipblocks + test_profile_dispatch + test_profile_mem + test_profile_join + test_profile_sort + test_profile_misc + PROPERTIES LABELS "profile" RESOURCE_GROUPS gpus:1) + +# --------------------------- +# analysis command tests +# --------------------------- + +add_test( + NAME test_analyze_commands + COMMAND + ${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS_ANALYSIS} --verbose + --junitxml=tests/test_analyze_commands.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_analyze_commands.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------------------------- +# analyze workloads tests +# --------------------------- + +add_test( + NAME test_analyze_workloads + COMMAND + ${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS} + --junitxml=tests/test_analyze_workloads.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_analyze_workloads.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------------------------- +# TCP counter tests +# --------------------------- + +add_test( + NAME test_L1_cache_counters + COMMAND + ${Python3_EXECUTABLE} -m pytest -m L1_cache + --junitxml=tests/test_L1_cache_counters.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_TCP_counters.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------------------------- +# Spec tests +# --------------------------- + +add_test( + NAME test_num_xcds_spec_class + COMMAND + ${Python3_EXECUTABLE} -m pytest -m num_xcds_spec_class + --junitxml=tests/test_num_xcds_spec_class.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_num_xcds_cli_output + COMMAND + ${Python3_EXECUTABLE} -m pytest -m num_xcds_cli_output + --junitxml=tests/test_num_xcds_cli_output.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------------------------- +# DB Connector tests +# --------------------------- + +add_test( + NAME test_db_connector + COMMAND ${Python3_EXECUTABLE} -m pytest --junitxml=tests/test_db_connector.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_db_connector.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------------------------- +# Utils tests +# --------------------------- + +add_test( + NAME test_utils + COMMAND ${Python3_EXECUTABLE} -m pytest --junitxml=tests/test_utils.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_utils.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------- +# Install +# --------- + +# top-level rocprofiler-compute utility +install( + PROGRAMS src/${EXECUTABLE_NAME} + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main) +# python dependency requirements +install( + FILES requirements.txt + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main) +# support files and version info +install( + FILES src/argparser.py src/config.py src/rocprof_compute_base.py src/roofline.py + VERSION VERSION.sha + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main) +# src/rocprof_compute_analyze +install( + DIRECTORY src/rocprof_compute_analyze + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main + PATTERN src/rocprof_compute_analyze/tests EXCLUDE + PATTERN "__pycache__" EXCLUDE) +# src/utils +install( + DIRECTORY src/utils + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main + PATTERN "rooflines*" EXCLUDE + PATTERN "__pycache__" EXCLUDE) +# src/utils/rooflines +file(GLOB rooflinebins src/utils/rooflines/roofline-*) +install( + PROGRAMS ${rooflinebins} + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT main) +# src/rocprof_compute_soc +install( + DIRECTORY src/rocprof_compute_soc + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main + PATTERN "__pycache__" EXCLUDE) +# src/rocprof_compute_profile +install( + DIRECTORY src/rocprof_compute_profile + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main + PATTERN "__pycache__" EXCLUDE) +# src/rocprof_compute_tui +install( + DIRECTORY src/rocprof_compute_tui + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main + PATTERN "__pycache__" EXCLUDE) +# grafana assets +install( + DIRECTORY grafana + DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} + COMPONENT main) +# samples +install( + DIRECTORY sample + DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} + COMPONENT main + FILES_MATCHING + PATTERN "*.hip" + PATTERN "*.h" + PATTERN "*.cpp" + PATTERN "workloads" EXCLUDE) +# modulefile +install( + FILES ${PROJECT_BINARY_DIR}/${MOD_INSTALL_PATH}/${ROCPROFCOMPUTE_FULL_VERSION}.lua + DESTINATION ${MOD_INSTALL_PATH} + COMPONENT main) + +# top-level symlink for bin/rocprof-compute +install( + CODE "execute_process( + COMMAND bash -c \"set -e + cd \$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX} + ln -sf ../${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}/${EXECUTABLE_NAME} ${CMAKE_INSTALL_BINDIR}/${EXECUTABLE_NAME} + \")" + COMPONENT main) + +# License header update(s) +add_custom_target( + license + COMMAND + ${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}/src + --license ${PROJECT_SOURCE_DIR}/LICENSE --extension '.py' + COMMAND + ${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR} + --license ${PROJECT_SOURCE_DIR}/LICENSE --file + "src/${PACKAGE_NAME},cmake/Dockerfile,cmake/rocm_install.sh,docker/docker-entrypoint.sh,src/rocprof_compute_analyze/convertor/mongodb/convert" + ) + +# Standalone binary creation +add_custom_target( + standalonebinary + # Change working directory to src + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/src + # Check nuitka + COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i nuitka > /dev/null 2>&1 + # Check patchelf + COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i patchelf > /dev/null 2>&1 + # Create VERSION.sha file + COMMAND git -C ${PROJECT_SOURCE_DIR} rev-parse HEAD > VERSION.sha + # Build standalone binary + # NOTE: --no-deployment-flag=self-execution is used to avoid self-execution and fork + # bombs as explained in + # https://nuitka.net/user-documentation/common-issue-solutions.html#fork-bombs-self-execution + COMMAND + ${Python3_EXECUTABLE} -m nuitka --mode=onefile --no-deployment-flag=self-execution + --include-data-files=${PROJECT_SOURCE_DIR}/VERSION*=./ --enable-plugin=no-qt + --include-package=dash_svg --include-package-data=dash_svg + --include-package=dash_bootstrap_components + --include-package-data=dash_bootstrap_components --include-package=plotly + --include-package-data=plotly --include-package=kaleido + --include-package-data=kaleido --include-package=rocprof_compute_analyze + --include-package-data=rocprof_compute_analyze + --include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc + --include-package=utils --include-package-data=utils rocprof-compute + # Remove library rpath from executable + COMMAND patchelf --remove-rpath rocprof-compute.bin + # Move to build directory + COMMAND mv rocprof-compute.bin ${CMAKE_BINARY_DIR}) + +install( + FILES ${PROJECT_SOURCE_DIR}/LICENSE + DESTINATION ${CMAKE_INSTALL_DOCDIR} + COMPONENT main) + +# TEST collateral +option(INSTALL_TESTS "Build test suite" OFF) +if(INSTALL_TESTS) + install( + DIRECTORY tests + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT tests + FILES_MATCHING + PATTERN "*.py" + PATTERN "__pycache__" EXCLUDE) + install( + FILES requirements-test.txt + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT tests) + install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.cmake + COMPONENT tests + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}) +endif() +message(STATUS "Install tests: ${INSTALL_TESTS}") + +# ---------- +# Packaging +# ---------- + +message(STATUS "Packaging config...") +set(CPACK_GENERATOR + "DEB" "RPM" + CACHE STRING "") +set(CPACK_PACKAGE_NAME + "${PROJECT_NAME}" + CACHE STRING "") +set(CPACK_PACKAGE_CONTACT "https://github.com/ROCm/rocprofiler-compute") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY + "ROCm Compute Profiler: tool for GPU performance profiling") +set(CPACK_RPM_PACKAGE_DESCRIPTION + "ROCm Compute Profiler is a performance analysis tool for profiling +machine learning/HPC workloads running on AMD GPUs.") +set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") + +# Package versioning +set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) +set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR}) +set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH}) +set(CPACK_PACKAGE_VERSION + "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}" + ) + +# RPM package specific variables +set(CPACK_RPM_PACKAGE_LICENSE "MIT") +set(CPACK_RPM_COMPONENT_INSTALL ON) +set(CPACK_RPM_PACKAGE_RELEASE_DIST ON) +set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") +set(CPACK_RPM_SPEC_MORE_DEFINE "%undefine __brp_mangle_shebangs") + +if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) + set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}") +endif() + +# Debian package specific variables +set(CPACK_DEBIAN_PACKAGE_LICENSE "MIT") +set(CPACK_DEB_COMPONENT_INSTALL ON) +set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") + +# Dependencies +set(PACKAGE_REQUIRES + "rocprofiler" + CACHE STRING "Package dependencies") +set(CPACK_RPM_PACKAGE_REQUIRES ${PACKAGE_REQUIRES}) +set(CPACK_DEBIAN_PACKAGE_DEPENDS ${PACKAGE_REQUIRES}) + +# Handle the project rebranding from omniperf to rocprofiler-compute +set(OMNIPERF_PACKAGE_NAME "omniperf") +set(CPACK_RPM_PACKAGE_PROVIDES ${OMNIPERF_PACKAGE_NAME}) +set(CPACK_RPM_PACKAGE_OBSOLETES "${OMNIPERF_PACKAGE_NAME} < 3.0.0") +set(CPACK_RPM_PACKAGE_CONFLICTS ${OMNIPERF_PACKAGE_NAME}) + +set(CPACK_DEBIAN_PACKAGE_PROVIDES ${OMNIPERF_PACKAGE_NAME}) +set(CPACK_DEBIAN_PACKAGE_REPLACES ${OMNIPERF_PACKAGE_NAME}) +set(CPACK_DEBIAN_PACKAGE_BREAKS ${OMNIPERF_PACKAGE_NAME}) + +# Disable automatic dependency generation +set(CPACK_RPM_PACKAGE_AUTOREQPROV OFF) +set(CPACK_RPM_PACKAGE_AUTOREQ OFF) +set(CPACK_RPM_PACKAGE_AUTOPROV OFF) + +if(INSTALL_TESTS) + set(CPACK_RPM_TESTS_PACKAGE_REQUIRES ${CPACK_PACKAGE_NAME}) + set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS ${CPACK_PACKAGE_NAME}) +endif() + +# ----- Check for packaging override ----- +if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) + set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}") +endif() + +if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) + set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) +else() + set(CPACK_RPM_PACKAGE_RELEASE "local") +endif() + +if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) +else() + set(CPACK_DEBIAN_PACKAGE_RELEASE "local") +endif() + +# Log package info +message(STATUS " Package Name: ${CPACK_PACKAGE_NAME}") +message(STATUS " Package Version: ${CPACK_PACKAGE_VERSION}") +message(STATUS " RPM Package Release: ${CPACK_RPM_PACKAGE_RELEASE}") +message(STATUS " Debian Package Release: ${CPACK_DEBIAN_PACKAGE_RELEASE}") +message(STATUS " Packaging Install Prefix: ${CPACK_PACKAGING_INSTALL_PREFIX}") +message(STATUS " Install Tests: ${INSTALL_TESTS}") +message(STATUS " Package Dependencies: ${PACKAGE_REQUIRES}") +message(STATUS " CPack Generator: ${CPACK_GENERATOR}") + +# Source tarball +set(CPACK_SOURCE_GENERATOR "TGZ") +set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CMAKE_PROJECT_NAME}-${FULL_VERSION_STRING}) +set(CPACK_SOURCE_IGNORE_FILES + ".*~$" + \.git/ + \.github + \.gitmodules + \.gitignore + /tests + /build) + +include(CPack) diff --git a/projects/rocprofiler-compute/CONTRIBUTING.md b/projects/rocprofiler-compute/CONTRIBUTING.md new file mode 100644 index 0000000000..de7f57b890 --- /dev/null +++ b/projects/rocprofiler-compute/CONTRIBUTING.md @@ -0,0 +1,59 @@ +## How to fork from us + +To keep our development fast and conflict free, we recommend you to [fork](https://github.com/ROCm/rocprofiler-compute/fork) our repository and start your work from our `develop` branch in your private repository. + +Afterwards, git clone your repository to your local machine. But that is not it! To keep track of the original develop repository, add it as another remote. + +``` +git remote add mainline https://github.com/ROCm/rocprofiler-compute.git +git checkout develop +``` + +As always in git, start a new branch with + +``` +git checkout -b topic- +``` + +and apply your changes there. For more help reference GitHub's ['About Forking'](https://docs.github.com/en/get-started/exploring-projects-on-github/contributing-to-a-project) page. + +## How to contribute to ROCm Compute Profiler + +### Did you find a bug? + +- Ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/ROCm/rocprofiler-compute/issues). + +- If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/ROCm/rocprofiler-compute/issues/new). + +### Did you write a patch that fixes a bug? + +- Open a new GitHub [pull request](https://github.com/ROCm/rocprofiler-compute/compare) with the patch. + +- Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it. + +- Ensure the PR is based on the `develop` branch of the ROCm Compute Profiler GitHub repository. + +> [!TIP] +> To ensure you meet all formatting requirements before publishing, we recommend you utilize our included [*pre-commit hooks*](https://pre-commit.com/#introduction). For more information on how to use pre-commit hooks please see the [section below](#using-pre-commit-hooks). + +## Using pre-commit hooks + +Our project supports optional [*pre-commit hooks*](https://pre-commit.com/#introduction) which developers can leverage to verify formatting before publishing their code. Once enabled, any commits you propose to the repository will be automatically checked for formatting. Initial setup is as follows: + +```console +python3 -m pip install pre-commit +cd rocprofiler-compute +pre-commit install +``` + +Now, when you commit code to the repository you should see something like this: + +![A screen capture showing terminal output from a pre-commit hook](docs/data/contributing/pre-commit-hook.png) + +Please see the [pre-commit documentation](https://pre-commit.com/#quick-start) for additional information. + +## Coding guidelines + +Below are some repository specific guidelines which are followed througout the repository. +Any future contributions should adhere to these guidelines: +* Use the `pathlib` library functions instead of `os.path` for manipulating the file paths. diff --git a/projects/rocprofiler-compute/LICENSE b/projects/rocprofiler-compute/LICENSE new file mode 100644 index 0000000000..7a7c12c897 --- /dev/null +++ b/projects/rocprofiler-compute/LICENSE @@ -0,0 +1,44 @@ +MIT License + +Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +This application uses the following dependencies and their usage is governed by their respective licenses +Python 3 standard library: PSFL +astunparse python library: PSFL +colorlover python library: MIT +dash python library: MIT +dash-bootstrap-components python library: MIT +dash-svg python library: MIT +kaleido python library: MIT +matplotlib python library: PSFL +Nuitka specific runtime code: Apache 2.0 license +numpy python library: BSD +pandas python library: BSD +plotext python library: MIT +plotille python library: MIT +pymongo python library: Apache 2.0 license +pyyaml python library: MIT +setuptools python library: MIT +tabulate python library: MIT +textual python library: MIT +textual_plotext python library: MIT +textual-fspicker python library: MIT +tqdm python library: MIT diff --git a/projects/rocprofiler-compute/MAINTAINERS.md b/projects/rocprofiler-compute/MAINTAINERS.md new file mode 100644 index 0000000000..4e224fd3f3 --- /dev/null +++ b/projects/rocprofiler-compute/MAINTAINERS.md @@ -0,0 +1,29 @@ +# Maintainers Guide to ROCm Compute Profiler + +## Publishing a release + +Before publishing a new ROCm Compute Profiler release, please review this checklist to ensure all prerequisites are met: + +1) **Ensure [VERSION](VERSION) file is updated** to reflect your desired release version. +2) **Sync `amd-mainline` with `amd-staging`**. Unless major changes were introduced, you should be able to merge using the fast-forward only strategy. +3) **Update [CHANGES](CHANGES)** to reflect all major modifications to the codebase since the last release. When modifying [CHANGES](CHANGES) please ensure formatting is consistent with the rest of the ROCm software stack. See [this template](https://github.com/ROCm/hipTensor/blob/develop/CHANGELOG.md) for reference. +4) **Confirm all CI tests are passing**. You can easily confirm this by peeking the passing status of all GitHub continuous integration tests. +5) **Create a tag from `amd-mainline`**. More information on tagging can be found at [Git Docs - Tagging](https://git-scm.com/book/en/v2/Git-Basics-Tagging). + +> [!NOTE] +Note: A successful tag should trigger the [packaging action](.github/workflows/packaging.yml) which will produce a tarball artifact. **This artifact needs to be included as an asset in your release**. The [packaging action](.github/workflows/packaging.yml) will automatically upload the artifact and generate release notes to the corresponding tag. + +Once you've completed the above checklist, you are ready to publish your release. Please ensure you follow formatting from [past ROCm Compute Profiler releases](https://github.com/ROCm/rocprofiler-compute/releases) for consistency. Some important aspects of our release formatting include: + +- Date of release is included in "Release Title". +- Updates are called out in "Release Description". Updates should mirror those listed in [CHANGES](CHANGES). +- Links to documentation and associated release tarball are called out in "Release Description". +- The tarball artifact from the corresponding tag is added to "Release Assets". + +### Publishing a release for ROCm + +If you are preparing for a new ROCm release, note that the [rocm-ci](https://github.com/rocm-ci) bot managed by DevOps will be triggering a tag automatically. This tag will follow the format `rocm-X.X.X`. + +Traditionally, we will bump the ROCm Compute Profiler [VERSION](VERSION) with a new ROCm release. When we bump the version and reach the prerequisite step (5) above, try tagging with `vX.X.X` to validate the release tarball generated by the [packaging action](.github/workflows/packaging.yml). + +In addition to the prerequisites mentioned above, please make sure that all changes have been merged from `amd-staging` -> `release/rocm-rel-X.X.X` to ensure that the "rocm-ci" bot will capture all your changes. It is easiest to file a single pull request ahead of the ROCm release. diff --git a/projects/rocprofiler-compute/README.md b/projects/rocprofiler-compute/README.md new file mode 100644 index 0000000000..4ef6fbe4e3 --- /dev/null +++ b/projects/rocprofiler-compute/README.md @@ -0,0 +1,106 @@ +[![Ubuntu 22.04](https://github.com/ROCm/rocprofiler-compute/actions/workflows/ubuntu-jammy.yml/badge.svg)](https://github.com/ROCm/rocprofiler-compute/actions/workflows/ubuntu-jammy.yml) +[![RHEL 8](https://github.com/ROCm/rocprofiler-compute/actions/workflows/rhel-8.yml/badge.svg)](https://github.com/ROCm/rocprofiler-compute/actions/workflows/rhel-8.yml) +[![Instinct](https://github.com/ROCm/rocprofiler-compute/actions/workflows/mi-rhel9.yml/badge.svg)](https://github.com/ROCm/rocprofiler-compute/actions/workflows/mi-rhel9.yml) +[![Docs](https://github.com/ROCm/rocprofiler-compute/actions/workflows/docs.yml/badge.svg)](https://rocm.github.io/rocprofiler-compute/) +[![DOI](https://zenodo.org/badge/561919887.svg)](https://zenodo.org/badge/latestdoi/561919887) + +# ROCm Compute Profiler + +## General + +ROCm Compute Profiler is a system performance profiling tool for machine +learning/HPC workloads running on AMD MI GPUs. The tool presently +targets usage on MI100, MI200, and MI300 accelerators. + +* For more information on available features, installation steps, and +workload profiling and analysis, please refer to the online +[documentation](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/). + +* ROCm Compute Profiler is an AMD open source research project and is not supported +as part of the ROCm software stack. We welcome contributions and +feedback from the community. Please see the +[CONTRIBUTING.md](CONTRIBUTING.md) file for additional details on our +contribution process. + +* Licensing information can be found in the [LICENSE](LICENSE) file. + +## Development + +ROCm Compute Profiler follows a +[main-dev](https://nvie.com/posts/a-successful-git-branching-model/) +branching model. As a result, our latest stable release is shipped +from the `amd-mainline` branch, while new features are developed in our +`develop` branch. + +Users may checkout `amd-staging` to preview upcoming features. + +## Testing + +Populate the empty variables in `Dockerfile.customrocmtest` based on latest CI build information. + +To quickly get the environment (bash shell) for building and testing, run the following commands: +* `cd docker` +* `docker compose -f docker-compose.customrocmtest.yml up --force-recreate -d && docker attach docker-customrocmtest-1` + +Inside the docker container, clean, build and install the project with tests enabled: +``` +rm -rf build install && cmake -B build -D CMAKE_INSTALL_PREFIX=install -D ENABLE_TESTS=ON -D INSTALL_TESTS=ON -DENABLE_COVERAGE=ON -S . && cmake --build build --target install --parallel 8 +``` + +Note that per the above command, build assets will be stored under `build` directory and installed assets will be stored under `install` directory. + +Then, to run the automated test suite, run the following command: +``` +ctest +``` + +For manual testing, you can find the executable at `install/bin/rocprof-compute` + +NOTE: This Dockerfile uses `ubuntu 22.04` as the base operating system image + +## Standalone binary + +To create a standalone binary, run the following commands: +* `cd docker` +* `docker compose -f docker-compose.standalone.yml up --force-recreate -d && docker attach docker-standalone-1` + +You should find the rocprof-compute.bin standalone binary inside the `build` folder in the root directory of the project. + +To build the binary we follow these steps: +* Use RHEL 8 image used to build ROCm as the base image +* Install python3.8 +* Install dependencies for runtime and for making standalone binary +* Call the make target which uses Nuitka to build the standalone binary + +NOTE: Since RHEL 8 ships with glibc version 2.28, this standalone binary can only be run on environment with glibc version greater than 2.28. +glibc version can be checked using `ldd --version` command. + +NOTE: libnss3.so shared library is required when using --roof-only option which generates roofline data in PDF format + +To test the standalone binary provide the `--call-binary` option to pytest. + +## How to Cite + +This software can be cited using a Zenodo +[DOI](https://doi.org/10.5281/zenodo.7314631) reference. A BibTex +style reference is provided below for convenience: + +``` +@software{xiaomin_lu_2022_7314631 + author = {Xiaomin Lu and + Cole Ramos and + Fei Zheng and + Karl W. Schulz and + Jose Santos and + Keith Lowery and + Nicholas Curtis and + Cristian Di Pietrantonio}, + title = {ROCm/rocprofiler-compute: v3.1.0 (12 February 2025)}, + month = February, + year = 2025, + publisher = {Zenodo}, + version = {v3.1.0}, + doi = {10.5281/zenodo.7314631}, + url = {https://doi.org/10.5281/zenodo.7314631} +} +``` diff --git a/projects/rocprofiler-compute/VERSION b/projects/rocprofiler-compute/VERSION new file mode 100644 index 0000000000..944880fa15 --- /dev/null +++ b/projects/rocprofiler-compute/VERSION @@ -0,0 +1 @@ +3.2.0 diff --git a/projects/rocprofiler-compute/cmake/Dockerfile b/projects/rocprofiler-compute/cmake/Dockerfile new file mode 100644 index 0000000000..87605794a7 --- /dev/null +++ b/projects/rocprofiler-compute/cmake/Dockerfile @@ -0,0 +1,68 @@ +##############################################################################bl +# MIT License +# +# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +FROM ubuntu:20.04 + +USER root + +COPY rocm_install.sh /rocprofiler-compute/rocm_install.sh + +ENV PATH="/rocprofiler-compute:${PATH}" +ENV TZ="US/Chicago" +ENV DEBIAN_FRONTEND noninteractive + +#pyenv dependencies +RUN apt update && \ + apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl +ENV HOME="/rocprofiler-compute" +WORKDIR $HOME +ENV PYENV_ROOT="$HOME/.pyenv" +ENV PATH="$PYENV_ROOT/bin:$PATH" + +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ + apt update && \ + apt-get install -y cmake wget git python3-dev rpm python3-venv software-properties-common &&\ + add-apt-repository ppa:deadsnakes/ppa -y &&\ + apt install python3.7 -y libpython3.7-dev python3.7-venv libnuma-dev &&\ + curl https://pyenv.run | bash + +RUN echo "export PATH=$HOME/.pyenv/bin:$PATH" >> ~/.bashrc &&\ + echo eval "$(pyenv init -)" >> ~/.bashrc &&\ + echo eval "$(pyenv virtualenv-init -)" >> ~/.bashrc &&\ + CPPFLAGS=-I/usr/bin/openssl \ + LDFLAGS=-L/usr/lib64 \ + CONFIGURE_OPTS=--enable-shared pyenv install -v 3.8.12 &&\ + pyenv global 3.8.12 &&\ + apt-get install -y python3-pip + #clang? + +RUN python3 -m pip install astunparse==1.6.2 colorlover dash matplotlib numpy pandas pymongo pyyaml tabulate tqdm dash-svg pyinstaller dash-bootstrap-components &&\ + python3 -m pip install 'cmake==3.21.4' && \ + ./rocm_install.sh &&\ + #wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \ + #echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION}/ ${ROCM_REPO_DIST} main" | tee /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get dist-upgrade -y && \ + #apt-get install -y rocm-dev rocm-utils rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev hip-base hsa-amd-aqlprofile hsa-rocr-dev hsakmt-roct-dev ${EXTRA_PACKAGES} && \ + apt-get autoclean diff --git a/projects/rocprofiler-compute/cmake/VERSION.sha.in b/projects/rocprofiler-compute/cmake/VERSION.sha.in new file mode 100644 index 0000000000..def9f78935 --- /dev/null +++ b/projects/rocprofiler-compute/cmake/VERSION.sha.in @@ -0,0 +1 @@ +@ROCPROFCOMPUTE_GIT_REV@ diff --git a/projects/rocprofiler-compute/cmake/modfile.crusher.mod b/projects/rocprofiler-compute/cmake/modfile.crusher.mod new file mode 100644 index 0000000000..46d6111574 --- /dev/null +++ b/projects/rocprofiler-compute/cmake/modfile.crusher.mod @@ -0,0 +1,4 @@ +-- Crusher-specific additions +depends_on "cray-python" +depends_on "rocm" +prereq(atleast("rocm","5.2.0")) diff --git a/projects/rocprofiler-compute/cmake/modfile.thera.mod b/projects/rocprofiler-compute/cmake/modfile.thera.mod new file mode 100644 index 0000000000..5e5cb5fdfe --- /dev/null +++ b/projects/rocprofiler-compute/cmake/modfile.thera.mod @@ -0,0 +1,6 @@ +-- Thera-specific additions +depends_on "python" +depends_on "rocm" +prereq(atleast("rocm","5.2.0")) +local home = os.getenv("HOME") +setenv("MPLCONFIGDIR",pathJoin(home,".matplotlib")) diff --git a/projects/rocprofiler-compute/cmake/rocm_install.sh b/projects/rocprofiler-compute/cmake/rocm_install.sh new file mode 100755 index 0000000000..377ad53ad5 --- /dev/null +++ b/projects/rocprofiler-compute/cmake/rocm_install.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +##############################################################################bl +# MIT License +# +# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +declare -a rocm_versions=("4.3.1" "4.5.2" "5.0.2" "5.1.3" "5.2.3") +wget https://repo.radeon.com/amdgpu-install/22.10/ubuntu/focal/amdgpu-install_22.10.50100-1_all.deb +apt-get install -y ./amdgpu-install_22.10.50100-1_all.deb +for rocm_version in ${rocm_versions[@]}; do + echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$rocm_version ubuntu main" | tee /etc/apt/sources.list.d/rocm.list + apt update + amdgpu-install -y --usecase=rocm --rocmrelease=$rocm_version --no-dkms +done diff --git a/projects/rocprofiler-compute/cmake/rocprofcompute.lua.in b/projects/rocprofiler-compute/cmake/rocprofcompute.lua.in new file mode 100644 index 0000000000..a085c20d88 --- /dev/null +++ b/projects/rocprofiler-compute/cmake/rocprofcompute.lua.in @@ -0,0 +1,31 @@ +local help_message = [[ + +ROCm Compute Profiler is an open-source performance analysis tool for profiling +machine learning/HPC workloads running on AMD MI GPUs. + +Version @ROCPROFCOMPUTE_FULL_VERSION@ +]] + +help(help_message,"\n") + +whatis("Name: @PROJECT_NAME@") +whatis("Version: @ROCPROFCOMPUTE_FULL_VERSION@") +whatis("Keywords: Profiling, Performance, GPU") +whatis("Description: tool for GPU performance profiling") +whatis("URL: https://github.com/ROCm/rocprofiler-compute") + +-- Export environmental variables +local topDir="@CMAKE_INSTALL_PREFIX@" +local binDir="@CMAKE_INSTALL_FULL_BINDIR@" +local shareDir="@CMAKE_INSTALL_FULL_DATADIR@" +local pythonDeps="@PYTHON_DEPS@" + +setenv("ROCPROFCOMPUTE_DIR",topDir) +setenv("ROCPROFCOMPUTE_BIN",binDir) +setenv("ROCPROFCOMPUTE_SHARE",shareDir) + +-- Update relevant PATH variables +prepend_path("PATH",binDir) +if ( pythonDeps ~= "" ) then + prepend_path("PYTHONPATH",pythonDeps) +end diff --git a/projects/rocprofiler-compute/docker/Dockerfile.customrocmtest b/projects/rocprofiler-compute/docker/Dockerfile.customrocmtest new file mode 100644 index 0000000000..7efc607c28 --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.customrocmtest @@ -0,0 +1,49 @@ +# Use a base image +FROM ubuntu:22.04 + +# Set the working directory +WORKDIR /app + +# Update package list and install prerequisites +RUN apt-get update && apt-get install -y \ + software-properties-common cmake locales git curl \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update + +# Allows running git commands in /app +RUN git config --global --add safe.directory /app + +# Generate the desired locale +RUN locale-gen en_US.UTF-8 + +# Install Python 3.10 and pip +RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip libsqlite3-dev + +# Update pip +RUN apt remove -y python3-wheel +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py +RUN python3.10 get-pip.py +RUN python3.10 -m pip install --upgrade pip setuptools wheel + +# Set Python 3.10 as the default python3 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 + +# Remove blinker python package +RUN apt-get remove -y python3-blinker + +# Install rocm +# Define custom version +ARG DEB_FILE="" +ARG AMDGPU_BUILD="" +ARG ROCM_BUILD="" +RUN curl -O "https://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/${DEB_FILE}" +RUN apt-get install -y "./${DEB_FILE}" +RUN amdgpu-repo --amdgpu-build="${AMDGPU_BUILD}" --rocm-build="compute-rocm-dkms-no-npi-hipclang/${ROCM_BUILD}" +RUN DEBIAN_FRONTEND=noninteractive TZ="America/Toronto" amdgpu-install --yes --usecase=rocm + +# Install any dependencies specified in requirements.txt +# Run interactive bash shell +CMD ["/bin/bash", "-c", "\ + python3.10 -m pip install -r requirements.txt -r requirements-test.txt \ + && exec /bin/bash \ +"] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.doctest b/projects/rocprofiler-compute/docker/Dockerfile.doctest new file mode 100644 index 0000000000..8b68b54337 --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.doctest @@ -0,0 +1,27 @@ +# Use a base image +FROM rocm/dev-ubuntu-22.04 + +# Set the working directory +WORKDIR /app + +# Update package list and install prerequisites +RUN apt-get update && apt-get install -y \ + software-properties-common cmake locales git \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update + +# Allows running git commands in /app +RUN git config --global --add safe.directory /app + +# Install Python 3.10 and pip + RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip + +# Set Python 3.10 as the default python3 + RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 + +# Install any dependencies specified in requirements.txt +# Run interactive bash shell + CMD ["/bin/bash", "-c", "\ + python3 -m pip install -r docs/sphinx/requirements.txt \ + && exec /bin/bash \ + "] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.opensuse b/projects/rocprofiler-compute/docker/Dockerfile.opensuse new file mode 100644 index 0000000000..209d19da1c --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.opensuse @@ -0,0 +1,54 @@ +ARG DISTRO=opensuse/leap +ARG VERSION=15.3 +FROM ${DISTRO}:${VERSION} + +ENV HOME /root +ENV SHELL /bin/bash +ENV BASH_ENV /etc/bash.bashrc +ENV DEBIAN_FRONTEND noninteractive + +WORKDIR /tmp +SHELL [ "/bin/bash", "-c" ] + +ENV PATH /usr/local/bin:${PATH} +ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64 + +RUN set +e; \ + zypper --non-interactive -i --gpg-auto-import-keys refresh; \ + zypper --non-interactive -i patch; \ + zypper --non-interactive -i patch; \ + zypper --non-interactive -i --gpg-auto-import-keys refresh; \ + exit 0 + +RUN zypper --non-interactive update -y && \ + zypper --non-interactive dist-upgrade -y && \ + zypper --non-interactive install -y -t pattern devel_basis && \ + zypper --non-interactive install -y python3-pip gcc-c++ git dpkg-devel rpm-build wget curl binutils-gold && \ + python3 -m pip install 'cmake==3.28.4' + +ARG ROCM_VERSION=0.0 +ARG AMDGPU_RPM=latest/sle/15/amdgpu-install-21.50.50000-1.noarch.rpm +ARG PERL_REPO=SLE_15 + +RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ + zypper --non-interactive addrepo https://mirrorcache-us.opensuse.org/repositories/devel:/languages:/perl/${PERL_REPO}/devel:languages:perl.repo && \ + zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ + zypper --non-interactive --gpg-auto-import-keys refresh && \ + zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \ + zypper --non-interactive clean --all; \ + fi + +ARG PYTHON_VERSIONS="6 7 8 9 10 11 12" + +RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH="/opt/conda/bin:${PATH}" && \ + conda config --set always_yes yes --set changeps1 no && \ + conda update -c defaults -n base conda && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ + conda clean -a -y && \ + conda init + +WORKDIR /home +SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.opensuse.ci b/projects/rocprofiler-compute/docker/Dockerfile.opensuse.ci new file mode 100644 index 0000000000..5dc94ef055 --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.opensuse.ci @@ -0,0 +1,48 @@ +ARG DISTRO=opensuse/leap +ARG VERSION=15.3 +FROM ${DISTRO}:${VERSION} + +ENV HOME /root +ENV SHELL /bin/bash +ENV BASH_ENV /etc/bash.bashrc +ENV DEBIAN_FRONTEND noninteractive + +WORKDIR /tmp +SHELL [ "/bin/bash", "-c" ] + +ENV PATH /usr/local/bin:${PATH} + +ARG EXTRA_PACKAGES="" +# ARG ELFUTILS_DOWNLOAD_VERSION="0.186" +# ARG NJOBS="12" + +RUN set +e; \ + zypper --non-interactive -i --gpg-auto-import-keys refresh; \ + zypper --non-interactive -i patch; \ + zypper --non-interactive -i patch; \ + zypper --non-interactive -i --gpg-auto-import-keys refresh; \ + exit 0 + +RUN zypper --non-interactive update -y && \ + zypper --non-interactive dist-upgrade -y && \ + zypper --non-interactive install -y -t pattern devel_basis && \ + zypper --non-interactive install -y python3-pip gcc-c++ git dpkg-devel rpm-build curl wget binutils-gold && \ + python3 -m pip install 'cmake==3.28.4' && \ + zypper --non-interactive clean --all + +ARG PYTHON_VERSIONS="6 7 8 9 10 11 12" + +RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH="/opt/conda/bin:${PATH}" && \ + conda config --set always_yes yes --set changeps1 no && \ + conda update -c defaults -n base conda && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy pandas dataclasses; done && \ + conda clean -a -y && \ + cd /tmp && \ + shopt -s dotglob extglob && \ + rm -rf * + +WORKDIR /home +SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.rhel b/projects/rocprofiler-compute/docker/Dockerfile.rhel new file mode 100644 index 0000000000..98225b7b6a --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.rhel @@ -0,0 +1,52 @@ +ARG DISTRO=rockylinux +ARG VERSION=8 +FROM ${DISTRO}:${VERSION} + +ENV HOME /root +ENV SHELL /bin/bash +ENV BASH_ENV /etc/bash.bashrc +ENV DEBIAN_FRONTEND noninteractive + +WORKDIR /tmp +SHELL [ "/bin/bash", "-c" ] + +ENV PATH /usr/local/bin:${PATH} +ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64 + +RUN yum groupinstall -y "Development Tools" && \ + yum install -y epel-release && \ + yum install -y --allowerasing curl dpkg-devel python3-pip wget zlib-devel which && \ + yum clean all && \ + python3 -m pip install 'cmake==3.28.4' + +ARG ROCM_VERSION=0.0 +ARG AMDGPU_RPM=5.4/rhel/8.7/amdgpu-install-5.4.50400-1.el8.noarch.rpm + +RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ + OS_VERSION_MAJOR=$(cat /etc/os-release | grep 'VERSION_ID' | sed 's/=/ /1' | awk '{print $NF}' | sed 's/"//g' | sed 's/\./ /g' | awk '{print $1}') && \ + if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \ + dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \ + yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ + yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \ + yum clean all; \ + fi + +ARG PYTHON_VERSIONS="6 7 8 9 10 11 12" + +RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH="/opt/conda/bin:${PATH}" && \ + conda config --set always_yes yes --set changeps1 no && \ + conda update -c defaults -n base conda && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ + conda clean -a -y && \ + conda init + +RUN if [ "${ROCM_VERSION}" != "0.0" ]; then ln -sf /opt/rocm-${ROCM_VERSION}* /opt/rocm; fi + +WORKDIR /home +ENV LC_ALL C.UTF-8 +SHELL [ "/bin/bash", "--login", "-c" ] +COPY ./entrypoint-rhel.sh /docker-entrypoint.sh +ENTRYPOINT [ "/docker-entrypoint.sh" ] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.rhel.ci b/projects/rocprofiler-compute/docker/Dockerfile.rhel.ci new file mode 100644 index 0000000000..c07738fe92 --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.rhel.ci @@ -0,0 +1,42 @@ + +ARG DISTRO=rockylinux +ARG VERSION=8 +FROM ${DISTRO}:${VERSION} + +ENV HOME /root +ENV SHELL /bin/bash +ENV BASH_ENV /etc/bash.bashrc +ENV DEBIAN_FRONTEND noninteractive + +WORKDIR /tmp +SHELL [ "/bin/bash", "-c" ] + +ENV PATH /usr/local/bin:${PATH} + +ARG EXTRA_PACKAGES="" +# ARG ELFUTILS_DOWNLOAD_VERSION="0.186" +# ARG NJOBS="12" + +RUN yum groupinstall -y "Development Tools" && \ + yum install -y epel-release && \ + yum install -y --allowerasing curl dpkg-devel python3-pip wget zlib-devel which git && \ + yum clean all && \ + python3 -m pip install --upgrade pip && \ + python3 -m pip install 'cmake==3.28.4' + +ARG PYTHON_VERSIONS="6 7 8 9 10 11 12" + +RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH="/opt/conda/bin:${PATH}" && \ + conda config --set always_yes yes --set changeps1 no && \ + conda update -c defaults -n base conda && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy pandas dataclasses; done && \ + conda clean -a -y && \ + cd /tmp && \ + shopt -s dotglob extglob && \ + rm -rf * + +WORKDIR /home +SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.standalone b/projects/rocprofiler-compute/docker/Dockerfile.standalone new file mode 100644 index 0000000000..fe498476d7 --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.standalone @@ -0,0 +1,22 @@ +FROM redhat/ubi8:8.10-1184 + +WORKDIR /app + +RUN yum install -y curl gcc cmake git + +# Allows running git commands in /app +RUN git config --global --add safe.directory /app + +RUN yum install -y python38 python38-devel && \ + yum clean all && \ + rm -rf /var/cache/yum && \ + curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.8 get-pip.py + +CMD ["/bin/bash", "-c", "\ + python3.8 -m pip install -r requirements.txt \ + && python3.8 -m pip install nuitka patchelf \ + && rm -rf build \ + && cmake -B build -S . \ + && make -C build standalonebinary \ +"] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.ubuntu b/projects/rocprofiler-compute/docker/Dockerfile.ubuntu new file mode 100644 index 0000000000..5ada1a306d --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.ubuntu @@ -0,0 +1,57 @@ +ARG DISTRO +ARG VERSION +FROM ${DISTRO}:${VERSION} + +ENV HOME /root +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US +ENV LC_ALL C +ENV SHELL /bin/bash +ENV BASH_ENV /etc/bash.bashrc +ENV DEBIAN_FRONTEND noninteractive + +WORKDIR /tmp +SHELL [ "/bin/bash", "-c" ] + +ARG EXTRA_PACKAGES="" +ARG ROCM_REPO_VERSION="debian" +ARG ROCM_VERSION="0.0" +ARG ROCM_REPO_DIST="ubuntu" +ARG PYTHON_VERSIONS="6 7 8 9 10 11 12" +ENV PATH ${HOME}/.local/bin:${PATH} + +RUN apt-get update && \ + apt-get dist-upgrade -y && \ + apt-get install -y build-essential cmake libnuma1 wget gnupg2 m4 bash-completion git-core autoconf libtool autotools-dev python3-pip lsb-release libpapi-dev libpfm4-dev libudev1 libopenmpi-dev rpm librpm-dev curl apt-utils && \ + python3 -m pip install 'cmake==3.28.4' + +RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ + if [ -d /etc/apt/trusted.gpg.d ]; then \ + wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/trusted.gpg.d/rocm.gpg; \ + else \ + wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -; \ + fi && \ + echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION}/ ${ROCM_REPO_DIST} main" | tee /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get dist-upgrade -y && \ + apt-get install -y hsa-amd-aqlprofile hsa-rocr-dev hsakmt-roct-dev && \ + apt-get install -y hip-base hip-runtime-amd hip-dev && \ + apt-get install -y rocm-llvm rocm-core rocm-smi-lib rocm-device-libs && \ + apt-get install -y roctracer-dev rocprofiler-dev rccl-dev ${EXTRA_PACKAGES} && \ + if [ "$(echo ${ROCM_VERSION} | awk -F '.' '{print $1}')" -lt "5" ]; then apt-get install -y rocm-dev; fi && \ + apt-get autoclean; \ + fi + +RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH="/opt/conda/bin:${PATH}" && \ + conda config --set always_yes yes --set changeps1 no && \ + conda update -c defaults -n base conda && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ + conda clean -a -y && \ + conda init + +ENV LC_ALL C.UTF-8 +WORKDIR /home +SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-compute/docker/Dockerfile.ubuntu.ci b/projects/rocprofiler-compute/docker/Dockerfile.ubuntu.ci new file mode 100644 index 0000000000..387a4f123a --- /dev/null +++ b/projects/rocprofiler-compute/docker/Dockerfile.ubuntu.ci @@ -0,0 +1,48 @@ + +ARG DISTRO +ARG VERSION +FROM ${DISTRO}:${VERSION} + +ENV HOME /root +ENV LANG C.UTF-8 +ENV SHELL /bin/bash +ENV BASH_ENV /etc/bash.bashrc +ENV DEBIAN_FRONTEND noninteractive + +WORKDIR /tmp +SHELL [ "/bin/bash", "-c" ] + +ARG EXTRA_PACKAGES="" +# ARG ELFUTILS_DOWNLOAD_VERSION="0.186" +# ARG BOOST_DOWNLOAD_VERSION="1.79.0" +# ARG NJOBS="12" +ARG PYTHON_VERSIONS="6 7 8 9 10 11 12" + +ENV PATH /usr/local/bin:${PATH} +ENV LIBRARY_PATH /usr/local/lib:/usr/local/lib64:${LIBRARY_PATH} +ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH} +ENV CMAKE_PREFIX_PATH /usr/local:${CMAKE_PREFIX_PATH} + + +RUN apt-get update && \ + apt-get dist-upgrade -y && \ + apt-get install -y autoconf autotools-dev bash-completion build-essential bzip2 cmake curl environment-modules git-core gnupg2 gzip libtool locales lsb-release m4 python3-pip unzip wget zip zlib1g-dev && \ + python3 -m pip install 'cmake==3.28.4' && \ + apt-get autoclean && \ + locale -a && \ + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH="/opt/conda/bin:${PATH}" && \ + conda config --set always_yes yes --set changeps1 no && \ + conda update -c defaults -n base conda && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy pandas dataclasses; done && \ + conda clean -a -y && \ + cd /tmp && \ + shopt -s dotglob extglob && \ + rm -rf * + + +ENV LC_ALL C.UTF-8 +WORKDIR /home +SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-compute/docker/build-docker-ci.sh b/projects/rocprofiler-compute/docker/build-docker-ci.sh new file mode 100755 index 0000000000..3e6cc277a8 --- /dev/null +++ b/projects/rocprofiler-compute/docker/build-docker-ci.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash + +set -e + +: ${USER:=$(whoami)} +: ${DISTRO:=ubuntu} +: ${VERSIONS:=20.04} +# : ${NJOBS=$(nproc)} +# : ${ELFUTILS_VERSION:=0.186} +# : ${BOOST_VERSION:=1.79.0} +: ${PYTHON_VERSIONS:="6 7 8 9 10 11 12"} +: ${PUSH:=0} +: ${PULL:=--pull} + +verbose-run() +{ + echo -e "\n### Executing \"${@}\"... ###\n" + eval $@ +} + +tolower() +{ + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; +} + +toupper() +{ + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; +} + +usage() +{ + print_option() { printf " --%-20s %-24s %s\n" "${1}" "${2}" "${3}"; } + echo "Options:" + print_option "help -h" "" "This message" + print_option "push" "" "Push the container to DockerHub when completed" + print_option "no-pull" "" "Do not pull down most recent base container" + + echo "" + print_default_option() { printf " --%-20s %-24s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; } + print_default_option distro "[ubuntu|opensuse|rhel]" "OS distribution" "${DISTRO}" + print_default_option versions "[VERSION] [VERSION...]" "Ubuntu, OpenSUSE, or RHEL release" "${VERSIONS}" + print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}" + # print_default_option "jobs -j" "[N]" "parallel build jobs" "${NJOBS}" + # print_default_option elfutils-version "[0.183..0.186]" "ElfUtils version" "${ELFUTILS_VERSION}" + # print_default_option boost-version "[1.67.0..1.79.0]" "Boost version" "${BOOST_VERSION}" + print_default_option user "[USERNAME]" "DockerHub username" "${USER}" +} + +send-error() +{ + usage + echo -e "\nError: ${@}" + exit 1 +} + +reset-last() +{ + last() { send-error "Unsupported argument :: ${1}"; } +} + +reset-last + +n=0 +while [[ $# -gt 0 ]] +do + case "${1}" in + -h|--help) + usage + exit 0 + ;; + "--distro") + shift + DISTRO=${1} + last() { DISTRO="${DISTRO} ${1}"; } + ;; + "--versions") + shift + VERSIONS=${1} + last() { VERSIONS="${VERSIONS} ${1}"; } + ;; + "--python-versions") + shift + PYTHON_VERSIONS=${1} + last() { PYTHON_VERSIONS="${PYTHON_VERSIONS} ${1}"; } + ;; + --user|-u) + shift + USER=${1} + reset-last + ;; + "--push") + PUSH=1 + reset-last + ;; + "--no-pull") + PULL="" + reset-last + ;; + --*) + reset-last + last ${1} + ;; + *) + last ${1} + ;; + esac + n=$((${n} + 1)) + shift +done + +DOCKER_FILE=Dockerfile.${DISTRO}.ci + +if [ ! -f ${DOCKER_FILE} ]; then cd docker; fi + +if [ ! -f ${DOCKER_FILE} ]; then + echo "Error! Execute script from source directory" + exit 1 +fi + +# verbose-run rm -rf ./dyninst-source +# verbose-run cp -r ../external/dyninst ./dyninst-source +# verbose-run rm -rf ./dyninst-source/{build,install}* + +set -e + +if [ "${DISTRO}" = "opensuse" ]; then + DISTRO_IMAGE="opensuse/leap" +elif [ "${DISTRO}" = "rhel" ]; then + DISTRO_IMAGE="rockylinux" +else + DISTRO_IMAGE=${DISTRO} +fi + +for VERSION in ${VERSIONS} +do + verbose-run docker build . \ + ${PULL} \ + -f ${DOCKER_FILE} \ + --tag ${USER}/rocprofiler-compute:ci-base-${DISTRO}-${VERSION} \ + --build-arg DISTRO=${DISTRO_IMAGE} \ + --build-arg VERSION=${VERSION} \ + --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + # --build-arg NJOBS=${NJOBS} \ + # --build-arg ELFUTILS_DOWNLOAD_VERSION=${ELFUTILS_VERSION} \ + # --build-arg BOOST_DOWNLOAD_VERSION=${BOOST_VERSION} +done + +if [ "${PUSH}" -gt 0 ]; then + for VERSION in ${VERSIONS} + do + verbose-run docker push ${USER}/rocprofiler-compute:ci-base-${DISTRO}-${VERSION} + done +fi diff --git a/projects/rocprofiler-compute/docker/build-docker.sh b/projects/rocprofiler-compute/docker/build-docker.sh new file mode 100755 index 0000000000..3b58a886c9 --- /dev/null +++ b/projects/rocprofiler-compute/docker/build-docker.sh @@ -0,0 +1,275 @@ +#!/usr/bin/env bash + +: ${USER:=$(whoami)} +: ${ROCM_VERSIONS:="5.0"} +: ${DISTRO:=ubuntu} +: ${VERSIONS:=20.04} +: ${PYTHON_VERSIONS:="6 7 8 9 10 11 12"} +: ${BUILD_CI:=""} +: ${PUSH:=0} +: ${PULL:=--pull} +: ${RETRY:=3} + +set -e + +tolower() +{ + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; +} + +toupper() +{ + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; +} + +usage() +{ + print_option() { printf " --%-20s %-24s %s\n" "${1}" "${2}" "${3}"; } + echo "Options:" + print_option "help -h" "" "This message" + print_option "no-pull" "" "Do not pull down most recent base container" + + echo "" + print_default_option() { printf " --%-20s %-24s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; } + print_default_option distro "[ubuntu|opensuse|rhel]" "OS distribution" "${DISTRO}" + print_default_option versions "[VERSION] [VERSION...]" "Ubuntu, OpenSUSE, or RHEL release" "${VERSIONS}" + print_default_option rocm-versions "[VERSION] [VERSION...]" "ROCm versions" "${ROCM_VERSIONS}" + print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}" + print_default_option "user -u" "[USERNAME]" "DockerHub username" "${USER}" + print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors)" "${RETRY}" + print_default_option push "" "Push the image to Dockerhub" "" + #print_default_option lto "[on|off]" "Enable LTO" "${LTO}" +} + +send-error() +{ + usage + echo -e "\nError: ${@}" + exit 1 +} + +verbose-run() +{ + echo -e "\n### Executing \"${@}\"... ###\n" + eval "${@}" +} + +verbose-build() +{ + echo -e "\n### Executing \"${@}\" a maximum of ${RETRY} times... ###\n" + for i in $(seq 1 1 ${RETRY}) + do + set +e + eval "${@}" + local RETC=$? + set -e + if [ "${RETC}" -eq 0 ]; then + break + else + echo -en "\n### Command failed with error code ${RETC}... " + if [ "${i}" -ne "${RETRY}" ]; then + echo -e "Retrying... ###\n" + sleep 3 + else + echo -e "Exiting... ###\n" + exit ${RETC} + fi + fi + done +} + +reset-last() +{ + last() { send-error "Unsupported argument :: ${1}"; } +} + +reset-last + +n=0 +while [[ $# -gt 0 ]] +do + case "${1}" in + -h|--help) + usage + exit 0 + ;; + "--distro") + shift + DISTRO=${1} + last() { DISTRO="${DISTRO} ${1}"; } + ;; + "--versions") + shift + VERSIONS=${1} + last() { VERSIONS="${VERSIONS} ${1}"; } + ;; + "--rocm-versions") + shift + ROCM_VERSIONS=${1} + last() { ROCM_VERSIONS="${ROCM_VERSIONS} ${1}"; } + ;; + "--python-versions") + shift + PYTHON_VERSIONS=${1} + last() { PYTHON_VERSIONS="${PYTHON_VERSIONS} ${1}"; } + ;; + --user|-u) + shift + USER=${1} + reset-last + ;; + --push) + PUSH=1 + reset-last + ;; + --no-pull) + PULL="" + reset-last + ;; + --retry|-r) + shift + RETRY=${1} + reset-last + ;; + "--*") + send-error "Unsupported argument at position $((${n} + 1)) :: ${1}" + ;; + *) + last ${1} + ;; + esac + n=$((${n} + 1)) + shift +done + +DOCKER_FILE="Dockerfile.${DISTRO}" + +if [ "${RETRY}" -lt 1 ]; then + RETRY=1 +fi + +if [ -n "${BUILD_CI}" ]; then DOCKER_FILE="${DOCKER_FILE}.ci"; fi +if [ ! -f ${DOCKER_FILE} ]; then cd docker; fi +if [ ! -f ${DOCKER_FILE} ]; then send-error "File \"${DOCKER_FILE}\" not found"; fi + +for VERSION in ${VERSIONS} +do + VERSION_MAJOR=$(echo ${VERSION} | sed 's/\./ /g' | awk '{print $1}') + VERSION_MINOR=$(echo ${VERSION} | sed 's/\./ /g' | awk '{print $2}') + VERSION_PATCH=$(echo ${VERSION} | sed 's/\./ /g' | awk '{print $3}') + for ROCM_VERSION in ${ROCM_VERSIONS} + do + CONTAINER=${USER}/rocprofiler-compute:release-base-${DISTRO}-${VERSION}-rocm-${ROCM_VERSION} + ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}') + ROCM_MINOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $2}') + ROCM_PATCH=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $3}') + if [ -n "${ROCM_PATCH}" ]; then + ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100)+(${ROCM_PATCH}) )) + ROCM_SEP="." + else + ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) )) + ROCM_SEP="" + fi + if [ "${DISTRO}" = "ubuntu" ]; then + ROCM_REPO_DIST="ubuntu" + ROCM_REPO_VERSION=${ROCM_VERSION} + case "${ROCM_VERSION}" in + 4.1* | 4.0*) + ROCM_REPO_DIST="xenial" + ;; + 5.3 | 5.3.* | 5.4 | 5.4.* | 5.5 | 5.5.* | 5.6 | 5.6.* | 5.7 | 5.7.* | 6.*) + case "${VERSION}" in + 24.04) + ROCM_REPO_DIST="noble" + ;; + 22.04) + ROCM_REPO_DIST="jammy" + ;; + 20.04) + ROCM_REPO_DIST="focal" + ;; + 18.04) + ROCM_REPO_DIST="bionic" + ;; + *) + ;; + esac + ;; + *) + ;; + esac + echo + verbose-build docker build . ${PULL} --progress plain -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg ROCM_REPO_VERSION=${ROCM_REPO_VERSION} --build-arg ROCM_REPO_DIST=${ROCM_REPO_DIST} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + elif [ "${DISTRO}" = "rhel" ]; then + if [ -z "${VERSION_MINOR}" ]; then + send-error "Please provide a major and minor version of the OS. Supported: >= 8.7, <= 9.3" + fi + + # Components used to create the sub-URL below + # set in amdgpu-install//rhel/ + RPM_PATH=${VERSION_MAJOR}.${VERSION_MINOR} + RPM_TAG=".el${VERSION_MAJOR}" + + # set the sub-URL in https://repo.radeon.com/amdgpu-install/ + case "${ROCM_VERSION}" in + 5.3 | 5.3.* | 5.4 | 5.4.* | 5.5 | 5.5.* | 5.6 | 5.6.* | 5.7 | 5.7.* | 6.0 | 6.0.*) + ROCM_RPM=${ROCM_VERSION}/rhel/${RPM_PATH}/amdgpu-install-${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1${RPM_TAG}.noarch.rpm + ;; + 5.2 | 5.2.* | 5.1 | 5.1.* | 5.0 | 5.0.* | 4.*) + send-error "Invalid ROCm version ${ROCM_VERSION}. Supported: >= 5.3.0, <= 5.5.x" + ;; + 0.0) + ;; + *) + send-error "Unsupported combination :: ${DISTRO}-${VERSION} + ROCm ${ROCM_VERSION}" + ;; + esac + + # use Rocky Linux as a base image for RHEL builds + DISTRO_BASE_IMAGE=rockylinux + + verbose-build docker build . ${PULL} --progress plain -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_BASE_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + elif [ "${DISTRO}" = "opensuse" ]; then + case "${VERSION}" in + 15.*) + DISTRO_IMAGE="opensuse/leap" + echo "DISTRO_IMAGE: ${DISTRO_IMAGE}" + ;; + *) + send-error "Invalid opensuse version ${VERSION}. Supported: 15.x" + ;; + esac + case "${ROCM_VERSION}" in + 5.3 | 5.3.* | 5.4 | 5.4.* | 5.5 | 5.5.* | 5.6 | 5.6.* | 5.7 | 5.7.* | 6.0 | 6.0.*) + ROCM_RPM=${ROCM_VERSION}/sle/${VERSION}/amdgpu-install-${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1.noarch.rpm + ;; + 5.2 | 5.2.*) + ROCM_RPM=22.20${ROCM_SEP}${ROCM_PATCH}/sle/${VERSION}/amdgpu-install-22.20.${ROCM_VERSN}-1.noarch.rpm + ;; + 5.1 | 5.1.*) + ROCM_RPM=22.10${ROCM_SEP}${ROCM_PATCH}/sle/15/amdgpu-install-22.10${ROCM_SEP}${ROCM_PATCH}.${ROCM_VERSN}-1.noarch.rpm + ;; + 5.0 | 5.0.*) + ROCM_RPM=21.50${ROCM_SEP}${ROCM_PATCH}/sle/15/amdgpu-install-21.50${ROCM_SEP}${ROCM_PATCH}.${ROCM_VERSN}-1.noarch.rpm + ;; + 4.5 | 4.5.*) + ROCM_RPM=21.40${ROCM_SEP}${ROCM_PATCH}/sle/15/amdgpu-install-21.40${ROCM_SEP}${ROCM_PATCH}.${ROCM_VERSN}-1.noarch.rpm + ;; + 0.0) + ;; + *) + send-error "Unsupported combination :: ${DISTRO}-${VERSION} + ROCm ${ROCM_VERSION}" + ;; + esac + if [[ "${VERSION_MAJOR}" -le 15 && "${VERSION_MINOR}" -le 5 ]]; then + PERL_REPO="15.6" + else + PERL_REPO="${VERSION_MAJOR}.${VERSION_MINOR}" + fi + verbose-build docker build . ${PULL} --progress plain -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PERL_REPO=${PERL_REPO} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + fi + if [ "${PUSH}" -ne 0 ]; then + docker push ${CONTAINER} + fi + done +done diff --git a/projects/rocprofiler-compute/docker/docker-compose.customrocmtest.yml b/projects/rocprofiler-compute/docker/docker-compose.customrocmtest.yml new file mode 100644 index 0000000000..614eb8ed9b --- /dev/null +++ b/projects/rocprofiler-compute/docker/docker-compose.customrocmtest.yml @@ -0,0 +1,17 @@ +services: + customrocmtest: # service name + build: + context: ../ + dockerfile: docker/Dockerfile.customrocmtest + devices: + - /dev/kfd + - /dev/dri + security_opt: + - seccomp:unconfined + volumes: + - ../:/app + - ../../rocprofiler-sdk:/rocprofiler-sdk + ports: + - 8050:8050 + tty: true + stdin_open: true diff --git a/projects/rocprofiler-compute/docker/docker-compose.doctest.yml b/projects/rocprofiler-compute/docker/docker-compose.doctest.yml new file mode 100644 index 0000000000..791ad512bb --- /dev/null +++ b/projects/rocprofiler-compute/docker/docker-compose.doctest.yml @@ -0,0 +1,14 @@ +services: + doctest: # service name + build: + context: ../ + dockerfile: docker/Dockerfile.doctest + devices: + - /dev/kfd + - /dev/dri + security_opt: + - seccomp:unconfined + volumes: + - ../:/app + tty: true + stdin_open: true diff --git a/projects/rocprofiler-compute/docker/docker-compose.standalone.yml b/projects/rocprofiler-compute/docker/docker-compose.standalone.yml new file mode 100644 index 0000000000..f02b3b276a --- /dev/null +++ b/projects/rocprofiler-compute/docker/docker-compose.standalone.yml @@ -0,0 +1,12 @@ +services: + standalone: + build: + context: ../ + dockerfile: docker/Dockerfile.standalone + devices: + - /dev/kfd + - /dev/dri + security_opt: + - seccomp:unconfined + volumes: + - ../:/app diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/.gitignore b/projects/rocprofiler-compute/docs/archive/docs-1.x/.gitignore new file mode 100644 index 0000000000..df638b8b20 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/.gitignore @@ -0,0 +1,5 @@ +/build* +/_build +/_doxygen +/.gitinfo +/omniperf.dox diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/.nojekyll b/projects/rocprofiler-compute/docs/archive/docs-1.x/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/Makefile b/projects/rocprofiler-compute/docs/archive/docs-1.x/Makefile new file mode 100644 index 0000000000..d4bb2cbb9e --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/README b/projects/rocprofiler-compute/docs/archive/docs-1.x/README new file mode 100644 index 0000000000..d888e91be2 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/README @@ -0,0 +1,6 @@ +This subdirectory houses the input markup for Omniperf documentation using +Sphinx. Changes committed here on the main branch will automatically be built +and pushed live using a Github action. + +You can build a local copy of the documentation in this directory using +"make html" assuming you have the necessary sphinx dependencies installed. diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/VERSION b/projects/rocprofiler-compute/docs/archive/docs-1.x/VERSION new file mode 100644 index 0000000000..7ee7020b38 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/VERSION @@ -0,0 +1 @@ +1.0.10 diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/analysis.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/analysis.md new file mode 100644 index 0000000000..fb9f498269 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/analysis.md @@ -0,0 +1,770 @@ +# Analyze Mode + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` +Omniperf offers several ways to interact with the metrics it generates from profiling. The option you choose will likey be influnced by your familiarity with the profiled application, computing enviroment, and experience with Omniperf. + +While analyzing with the CLI offers quick and straightforward access to Omniperf metrics from terminal, the GUI adds an extra layer of styling and interactiveness some users may prefer. + +See sections below for more information on each. + +## CLI Analysis +> Profiling results from the [aforementioned vcopy workload](https://rocm.github.io/omniperf/profiling.html#workload-compilation) will be used in the following sections to demonstrate the use of Omniperf in MI GPU performance analysis. Unless otherwise noted, the performance analysis is done on the MI200 platform. + +### Features + +- All Omniperf built-in metrics. +- Multiple runs base line comparison. +- Metrics customization: pick up subset of build-in metrics or build your own profiling configuration. +- Kernel, gpu-id, dispatch-id filters. + +Run `omniperf analyze -h` for more details. + +### Recommended workflow + +1) To begin, generate a comprehensive analysis report with Omniperf CLI. +```shell-session +$ omniperf analyze -p workloads/vcopy/mi200/ + +-------- +Analyze +-------- + + +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20000.00 │ 20000.00 │ 20000.00 │ 100.00 │ +│ │ int) [clone .kd] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +1. System Info +╒══════════════════╤═══════════════════════════════════════════════╕ +│ │ Info │ +╞══════════════════╪═══════════════════════════════════════════════╡ +│ workload_name │ vcopy │ +├──────────────────┼───────────────────────────────────────────────┤ +│ command │ /home/colramos/vcopy 1048576 256 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ host_name │ sv-pdp-2 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ host_cpu │ AMD EPYC 7282 16-Core Processor │ +├──────────────────┼───────────────────────────────────────────────┤ +│ host_distro │ Ubuntu 20.04.3 LTS │ +├──────────────────┼───────────────────────────────────────────────┤ +│ host_kernel │ 5.15.0-43-generic │ +├──────────────────┼───────────────────────────────────────────────┤ +│ host_rocmver │ 5.2.1-79 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ date │ Fri Jan 20 11:22:20 2023 (CST) │ +├──────────────────┼───────────────────────────────────────────────┤ +│ gpu_soc │ gfx90a │ +├──────────────────┼───────────────────────────────────────────────┤ +│ numSE │ 8 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ numCU │ 104 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ numSIMD │ 4 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ waveSize │ 64 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ maxWavesPerCU │ 32 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ maxWorkgroupSize │ 1024 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ L1 │ 16 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ L2 │ 8192 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ sclk │ 1700 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ mclk │ 1600 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ cur_sclk │ 800 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ cur_mclk │ 1600 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ L2Banks │ 32 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ name │ mi200 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ numSQC │ 56 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ hbmBW │ 1638.4 │ +├──────────────────┼───────────────────────────────────────────────┤ +│ ip_blocks │ roofline|SQ|LDS|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF │ +╘══════════════════╧═══════════════════════════════════════════════╛ + + +-------------------------------------------------------------------------------- +2. System Speed-of-Light +.... +``` + 2. Use `--list-metrics` to generate a list of availible metrics for inspection + ```shell-session +$ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a +╒═════════╤═════════════════════════════╕ +│ │ Metric │ +╞═════════╪═════════════════════════════╡ +│ 0 │ Top Stat │ +├─────────┼─────────────────────────────┤ +│ 1 │ System Info │ +├─────────┼─────────────────────────────┤ +│ 2.1.0 │ VALU_FLOPs │ +├─────────┼─────────────────────────────┤ +│ 2.1.1 │ VALU_IOPs │ +├─────────┼─────────────────────────────┤ +│ 2.1.2 │ MFMA_FLOPs_(BF16) │ +├─────────┼─────────────────────────────┤ +│ 2.1.3 │ MFMA_FLOPs_(F16) │ +├─────────┼─────────────────────────────┤ +│ 2.1.4 │ MFMA_FLOPs_(F32) │ +├─────────┼─────────────────────────────┤ +│ 2.1.5 │ MFMA_FLOPs_(F64) │ +├─────────┼─────────────────────────────┤ +│ 2.1.6 │ MFMA_IOPs_(Int8) │ +├─────────┼─────────────────────────────┤ +│ 2.1.7 │ Active_CUs │ +├─────────┼─────────────────────────────┤ +│ 2.1.8 │ SALU_Util │ +├─────────┼─────────────────────────────┤ +│ 2.1.9 │ VALU_Util │ +├─────────┼─────────────────────────────┤ +│ 2.1.10 │ MFMA_Util │ +├─────────┼─────────────────────────────┤ +│ 2.1.11 │ VALU_Active_Threads/Wave │ +├─────────┼─────────────────────────────┤ +│ 2.1.12 │ IPC_-_Issue │ +├─────────┼─────────────────────────────┤ +│ 2.1.13 │ LDS_BW │ +├─────────┼─────────────────────────────┤ +│ 2.1.14 │ LDS_Bank_Conflict │ +├─────────┼─────────────────────────────┤ +│ 2.1.15 │ Instr_Cache_Hit_Rate │ +├─────────┼─────────────────────────────┤ +│ 2.1.16 │ Instr_Cache_BW │ +├─────────┼─────────────────────────────┤ +│ 2.1.17 │ Scalar_L1D_Cache_Hit_Rate │ +├─────────┼─────────────────────────────┤ +│ 2.1.18 │ Scalar_L1D_Cache_BW │ +├─────────┼─────────────────────────────┤ +│ 2.1.19 │ Vector_L1D_Cache_Hit_Rate │ +├─────────┼─────────────────────────────┤ +│ 2.1.20 │ Vector_L1D_Cache_BW │ +├─────────┼─────────────────────────────┤ +│ 2.1.21 │ L2_Cache_Hit_Rate │ +├─────────┼─────────────────────────────┤ +│ 2.1.22 │ L2-Fabric_Read_BW │ +├─────────┼─────────────────────────────┤ +│ 2.1.23 │ L2-Fabric_Write_BW │ +├─────────┼─────────────────────────────┤ +│ 2.1.24 │ L2-Fabric_Read_Latency │ +├─────────┼─────────────────────────────┤ +│ 2.1.25 │ L2-Fabric_Write_Latency │ +├─────────┼─────────────────────────────┤ +... + ``` + 2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/ROCm/omniperf/blob/amd-mainline/src/rocprof_compute_analyze/configs/panel_config_template.yaml). Below shows how to generate a report containing only metric 2 (a.k.a. System Speed-of-Light). +```shell-session +$ omniperf analyze -p workloads/vcopy/mi200/ -b 2 +-------- +Analyze +-------- + +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20000.00 │ 20000.00 │ 20000.00 │ 100.00 │ +│ │ int) [clone .kd] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +2. System Speed-of-Light +╒═════════╤═══════════════════════════╤═══════════════════════╤══════════════════╤════════════════════╤════════════════════════╕ +│ Index │ Metric │ Value │ Unit │ Peak │ PoP │ +╞═════════╪═══════════════════════════╪═══════════════════════╪══════════════════╪════════════════════╪════════════════════════╡ +│ 2.1.0 │ VALU FLOPs │ 0.0 │ Gflop │ 22630.4 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.1 │ VALU IOPs │ 367.0016 │ Giop │ 22630.4 │ 1.6217194570135745 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.2 │ MFMA FLOPs (BF16) │ 0.0 │ Gflop │ 90521.6 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.3 │ MFMA FLOPs (F16) │ 0.0 │ Gflop │ 181043.2 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.4 │ MFMA FLOPs (F32) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.5 │ MFMA FLOPs (F64) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.6 │ MFMA IOPs (Int8) │ 0.0 │ Giop │ 181043.2 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.7 │ Active CUs │ 74 │ Cus │ 104 │ 71.15384615384616 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.8 │ SALU Util │ 4.016057506716307 │ Pct │ 100 │ 4.016057506716307 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.9 │ VALU Util │ 5.737225009594725 │ Pct │ 100 │ 5.737225009594725 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.10 │ MFMA Util │ 0.0 │ Pct │ 100 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.11 │ VALU Active Threads/Wave │ 64.0 │ Threads │ 64 │ 100.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.12 │ IPC - Issue │ 1.0 │ Instr/cycle │ 5 │ 20.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.13 │ LDS BW │ 0.0 │ Gb/sec │ 22630.4 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.14 │ LDS Bank Conflict │ │ Conflicts/access │ 32 │ │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.15 │ Instr Cache Hit Rate │ 99.91306912556854 │ Pct │ 100 │ 99.91306912556854 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.16 │ Instr Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.17 │ Scalar L1D Cache Hit Rate │ 99.81986908342313 │ Pct │ 100 │ 99.81986908342313 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.18 │ Scalar L1D Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.19 │ Vector L1D Cache Hit Rate │ 50.0 │ Pct │ 100 │ 50.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.20 │ Vector L1D Cache BW │ 1677.7216 │ Gb/s │ 11315.199999999999 │ 14.82714932126697 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.21 │ L2 Cache Hit Rate │ 35.55067615693325 │ Pct │ 100 │ 35.55067615693325 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.22 │ L2-Fabric Read BW │ 419.8496 │ Gb/s │ 1638.4 │ 25.6255859375 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.23 │ L2-Fabric Write BW │ 293.9456 │ Gb/s │ 1638.4 │ 17.941015625 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.24 │ L2-Fabric Read Latency │ 256.6482321288385 │ Cycles │ │ │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.25 │ L2-Fabric Write Latency │ 317.2264255699014 │ Cycles │ │ │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.26 │ Wave Occupancy │ 1821.723057333852 │ Wavefronts │ 3328 │ 54.73927455931046 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.27 │ Instr Fetch BW │ 4.174722306564298e-08 │ Gb/s │ 3046.4 │ 1.3703789084047721e-09 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.28 │ Instr Fetch Latency │ 21.729248046875 │ Cycles │ │ │ +╘═════════╧═══════════════════════════╧═══════════════════════╧══════════════════╧════════════════════╧════════════════════════╛ +``` +> **Note:** Some cells may be blank indicating a missing/unavailable hardware counter or NULL value + +3. Optimizatize application, iterate, and re-profile to inspect performance changes. +4. Redo a comprehensive analysis with Omniperf CLI at any milestone or at the end. + +### Demo + +- Single run + ```shell + $ omniperf analyze -p workloads/vcopy/mi200/ + ``` + +- List top kernels + ```shell + $ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels + ``` + +- List metrics + + ```shell + $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a + ``` + +- Customized profiling "System Speed-of-Light" and "CS_Busy" only + + ```shell + $ omniperf analyze -p workloads/vcopy/mi200/ -b 2 5.1.0 + ``` + + > Note: Users can filter single metric or the whole IP block by its id. In this case, 1 is the id for "system speed of light" and 5.1.0 the id for metric "GPU Busy Cycles". + +- Filter kernels + + First, list the top kernels in your application using `--list-kernels`. + ```shell-session + $ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels + + -------- + Analyze + -------- + + + -------------------------------------------------------------------------------- + Detected Kernels + ╒════╤══════════════════════════════════════════════════════════╕ + │ │ KernelName │ + ╞════╪══════════════════════════════════════════════════════════╡ + │ 0 │ vecCopy(double*, double*, double*, int, int) [clone .kd] │ + ╘════╧══════════════════════════════════════════════════════════╛ + + ``` + + Second, select the index of the kernel you'd like to filter (i.e. __vecCopy(double*, double*, double*, int, int) [clone .kd]__ at index __0__). Then, use this index to apply the filter via `-k/--kernels`. + + ```shell-session + $ omniperf -p workloads/vcopy/mi200/ -k 0 + + -------- + Analyze + -------- + + + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╤═════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ S │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╪═════╡ + │ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20800.00 │ 20800.00 │ 20800.00 │ 100.00 │ * │ + │ │ int) [clone .kd] │ │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╧═════╛ + ... ... + ``` + + > Note: You'll see your filtered kernel(s) indicated by a asterisk in the Top Stats table + + +- Baseline comparison + + ```shell + omniperf analyze -p workload1/path/ -p workload2/path/ + ``` + > Note: You can also apply diffrent filters to each workload. + + OR + ```shell + omniperf analyze -p workload1/path/ -k 0 -p workload2/path/ -k 1 + ``` + +## GUI Analysis + +### Web-based GUI + +#### Features + +Omniperf's standalone GUI analyzer is a lightweight web page that can +be generated directly from the command-line. This option is provided +as an alternative for users wanting to explore profiling results +graphically, but without the additional setup requirements or +server-side overhead of Omniperf's detailed [Grafana +interface](https://rocm.github.io/omniperf/analysis.html#grafana-based-gui) +option. The standalone GUI analyzer is provided as simple +[Flask](https://flask.palletsprojects.com/en/2.2.x/) application +allowing users to view results from within a web browser. + +```{admonition} Port forwarding + +Note that the standalone GUI analyzer publishes a web interface on port 8050 by default. +On production HPC systems where profiling jobs run +under the auspices of a resource manager, additional SSH tunneling +between the desired web browser host (e.g. login node or remote workstation) and compute host may be +required. Alternatively, users may find it more convenient to download +profiled workloads to perform analysis on their local system. + +See [FAQ](https://rocm.github.io/omniperf/faq.html) for more details on SSH tunneling. +``` + +#### Usage + +To launch the standalone GUI, include the `--gui` flag with your desired analysis command. For example: + +```shell-session +$ omniperf analyze -p workloads/vcopy/mi200/ --gui + +-------- +Analyze +-------- + +Dash is running on http://0.0.0.0:8050/ + + * Serving Flask app 'rocprof_compute_analyze.rocprof_compute_analyze' (lazy loading) + * Environment: production + WARNING: This is a development server. Do not use it in a production deployment. + Use a production WSGI server instead. + * Debug mode: off + * Running on all addresses (0.0.0.0) + WARNING: This is a development server. Do not use it in a production deployment. + * Running on http://127.0.0.1:8050 + * Running on http://10.228.32.139:8050 (Press CTRL+C to quit) +``` + +At this point, users can then launch their web browser of choice and +go to http://localhost:8050/ to see an analysis page. + + + +![Standalone GUI Homepage](images/standalone_gui.png) + +```{tip} +To launch the web application on a port other than 8050, include an optional port argument: +`--gui ` +``` + +When no filters are applied, users will see five basic sections derived from their application's profiling data: + +1. Memory Chart Analysis +2. Empirical Roofline Analysis +3. Top Stats (Top Kernel Statistics) +4. System Info +5. System Speed-of-Light + +To dive deeper, use the top drop down menus to isolate particular +kernel(s) or dispatch(s). You will then see the web page update with +metrics specific to the filter you've applied. + +Once you have applied a filter, you will also see several additional +sections become available with detailed metrics specific to that area +of AMD hardware. These detailed sections mirror the data displayed in +Omniperf's [Grafana +interface](https://rocm.github.io/omniperf/analysis.html#grafana-based-gui). + +### Grafana-based GUI + +#### Features +The Omniperf Grafana GUI Analyzer supports the following features to facilitate MI GPU performance profiling and analysis: + +- System and IP-Block Speed-of-Light (SOL) +- Multiple normalization options, including per-cycle, per-wave, per-kernel and per-second. +- Baseline comparisons +- Regex based Dispatch ID filtering +- Roofline Analysis +- Detailed per IP Block performance counters and metrics + - CPC/CPF + - SPI + - SQ + - SQC + - TA/TD + - TCP + - TCC (both aggregated and per-channel perf info) + +##### Speed-of-Light +Speed-of-light panels are provided at both the system and per IP block level to help diagnosis performance bottlenecks. The performance numbers of the workload under testing are compared to the theoretical maximum, (e.g. floating point operations, bandwidth, cache hit rate, etc.), to indicate the available room to further utilize the hardware capability. + +##### Multi Normalization + +Multiple performance number normalizations are provided to allow performance inspection within both HW and SW context. The following normalizations are permitted: +- per cycle +- per wave +- per kernel +- per second + +##### Baseline Comparison +Omniperf enables baseline comparison to allow checking A/B effect. The current release limits the baseline comparison to the same SoC. Cross comparison between SoCs is in development. + +For both the Current Workload and the Baseline Workload, one can independently setup the following filters to allow fine grained comparions: +- Workload Name +- GPU ID filtering (multi selection) +- Kernel Name filtering (multi selection) +- Dispatch ID filtering (Regex filtering) +- Omniperf Panels (multi selection) + +##### Regex based Dispatch ID filtering +This release enables regex based dispatch ID filtering to flexibly choose the kernel invocations. One may refer to [Regex Numeric Range Generator](https://3widgets.com/), to generate typical number ranges. + +For example, if one wants to inspect Dispatch Range from 17 to 48, inclusive, the corresponding regex is : **(1[7-9]|[23]\d|4[0-8])**. The generated express can be copied over for filtering. + +##### Incremental Profiling +Omniperf supports incremental profiling to significantly speed up performance analysis. + +> Refer to [*IP Block profiling*](https://rocm.github.io/omniperf/profiling.html#ip-block-profiling) section for this command. + +By default, the entire application is profiled to collect perfmon counter for all IP blocks, giving a system level view of where the workload stands in terms of performance optimization opportunities and bottlenecks. + +After that one may focus on only a few IP blocks, (e.g., L1 Cache or LDS) to closely check the effect of software optimizations, without performing application replay for all other IP Blocks. This saves lots of compute time. In addition, the prior profiling results for other IP blocks are not overwritten. Instead, they can be merged during the import to piece together the system view. + +##### Color Coding +The uniform color coding is applied to most visualizations (bars, table, diagrams etc). Typically, Yellow color means over 50%, while Red color mean over 90% percent, for easy inspection. + +##### Global Variables and Configurations + +![Grafana GUI Global Variables](images/global_variables.png) + +#### Grafana GUI Import +The omniperf database `--import` option imports the raw profiling data to Grafana's backend MongoDB database. This step is only required for Grafana GUI based performance analysis. + +Default username and password for MongoDB (to be used in database mode) are as follows: + + - Username: **temp** + - Password: **temp123** + +Each workload is imported to a separate database with the following naming convention: + + omniperf___ + +e.g., omniperf_asw_vcopy_mi200. + +When using database mode, be sure to tailor the connection options to the machine hosting your [sever-side instance](./installation.md). Below is the sample command to import the *vcopy* profiling data, lets assuming our host machine is called "dummybox". + +```shell-session +$ omniperf database --help +ROC Profiler: /usr/bin/rocprof + +usage: + +omniperf database [connection options] + + + +------------------------------------------------------------------------------- + +Examples: + + omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/ + + omniperf database --remove -H pavii1 -u temp -w omniperf_asw_sample_mi200 + +------------------------------------------------------------------------------- + + + +Help: + -h, --help show this help message and exit + +General Options: + -v, --version show program's version number and exit + -V, --verbose Increase output verbosity + +Interaction Type: + -i, --import Import workload to Omniperf DB + -r, --remove Remove a workload from Omniperf DB + +Connection Options: + -H , --host Name or IP address of the server host. + -P , --port TCP/IP Port. (DEFAULT: 27018) + -u , --username Username for authentication. + -p , --password The user's password. (will be requested later if it's not set) + -t , --team Specify Team prefix. + -w , --workload Specify name of workload (to remove) or path to workload (to import) + -k , --kernelVerbose Specify Kernel Name verbose level 1-5. + Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5) +``` + +**omniperf import for vcopy:** +```shell-session +$ omniperf database --import -H dummybox -u temp -t asw -w workloads/vcopy/mi200/ +ROC Profiler: /usr/bin/rocprof + +-------- +Import Profiling Results +-------- + +Pulling data from /home/amd/xlu/test/workloads/vcopy/mi200 +The directory exists +Found sysinfo file +KernelName shortening enabled +Kernel name verbose level: 2 +Password: +Password recieved +-- Conversion & Upload in Progress -- + 0%| | 0/11 [00:00 Note: The Memory Chart Analysis support multiple normalizations. Due to the space limit, all transactions, when normalized to per-sec, default to unit of Billion transactions per second. + +![Memory Chart Analysis](images/Memory_chart_analysis.png) + +##### Roofline Analysis +![Roofline Analysis](images/Roofline_analysis.png) +##### Command Processor +![Command Processor](images/Command_processor.png) +##### Shader Processing Input (SPI) +![Shader Processing Input](images/Shader_processing_input.png) +##### Wavefront Launch +![Wavefront Launch](images/Wavefront_launch.png) + +##### Compute Unit - Instruction Mix +###### Instruction Mix +![Instruction Mix](images/Instruction_mix.png) +###### VALU Arithmetic Instruction Mix +![VALU Arithmetic Instruction Mix](images/VALU_arithmetic_instruction_mix.png) +###### MFMA Arithmetic Instruction Mix +![MFMA Arithmetic Instruction Mix](images/MFMA_arithmetic_instruction_mix.png) +###### VMEM Arithmetic Instruction Mix +![VMEM Arithmetic Instruction Mix](images/VMEM_arithmetic_intensity_mix.png) + +##### Compute Unit - Compute Pipeline +###### Speed-of-Light +![Speed-of-Light](images/Comp_pipe_sol.png) +###### Compute Pipeline Stats +![Compute Pipeline Stats](images/Compute_pipeline_stats.png) +###### Arithmetic Operations +![Arithmetic Operations](images/Arithmetic_operations.png) +###### Memory Latencies +![Memory Latencies](images/Memory_latencies.png) + +##### Local Data Share (LDS) +###### Speed-of-Light +![Speed-of-Light](images/LDS_sol.png) +###### LDS Stats +![LDS Stats](images/LDS_stats.png) + +##### Instruction Cache +###### Speed-of-Light +![Speed-of-Light](images/Instruc_cache_sol.png) +###### Instruction Cache Stats +![Instruction Cache Stats](images/Instruction_cache_stats.png) + +##### Scalar L1D Cache +###### Speed-of-Light +![](images/L1D_sol.png) +###### Constant Cache Stats +![Constant Cache Stats](images/Vec_L1D_cache_accesses.png) +###### Constant Cache - L2 Interface +![Constant Cache - L2 Interface](images/Constant_cache_l2_interface.png) + +##### Texture Address and Texture Data +###### Texture Address (TA) +![Texture Address](images/Texture_address.png) +###### Texture Data (TD) +![Texture Data](images/Texture_data.png) + +##### Vector L1D Cache +###### Speed-of-Light +![Speed-of-Light](images/Vec_L1D_cache_sol.png) +###### Vector L1D Cache Accesses +![Vector L1D Cache Accesses](images/Vec_L1D_cache_accesses.png) +###### L1 Cache Stalls +![L1 Cache Stalls](images/L1_cache_stalls.png) +###### L1 - L2 Transactions +![L1 - L2 Transactions](images/L1_l2_transactions.png) +###### L1 - UTCL1 Interface Stats +![L1 - UTCL1 Interface Stats](images/L1_utcl1_transactions.png) + +##### L2 Cache +###### Speed-of-Light +![Speed-of-Light](images/L2_cache_sol.png) +###### L2 Cache Accesses +![L2 Cache Accesses](images/L2_cache_accesses.png) +###### L2 - EA Transactions +![L2 - EA Transactions](images/L2_ea_transactions.png) +###### L2 - EA Stalls +![L2 - EA Stalls](images/L2_ea_stalls.png) + +##### L2 Cache Per Channel Performance +###### L1-L2 Transactions +![L1-L2 Transactions](images/L1_l2_transactions_per_channel.png) +###### L2-EA Transactions +![L2-EA Transactions](images/L2_ea_transactions_per_channel.png) +###### L2-EA Latencies +![L2-EA Latencies](images/L2_ea_latencies_per_channel.png) +###### L2-EA Stalls +![L2-EA Stalls](images/L2_ea_stalls_per_channel.png) +###### L2-EA Write Stalls +![L2-EA Write Stalls](images/L2_ea_write_stalls_per_channel.png) +###### L2-EA Write Starvation +![L2-EA Write Starvation](images/L2_ea_write_starvation_per_channel.png) diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/conf.py b/projects/rocprofiler-compute/docs/archive/docs-1.x/conf.py new file mode 100644 index 0000000000..5f3e385c18 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/conf.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +import subprocess as sp +import sys + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use str(Path().absolute().resolve()) to make it absolute, like shown here. +# +from pathlib import Path + +sys.path.insert(0, str(Path("..").absolute().resolve())) + +repo_version = "unknown" +# Determine short version by file in repo +if Path("./VERSION").is_file(): + with open("./VERSION") as f: + repo_version = f.readline().strip() + + +def install(package): + sp.call([sys.executable, "-m", "pip", "install", package]) + + +# -- Project information ----------------------------------------------------- + +project = "Omniperf" +copyright = "2022, Audacious Software Group" +author = "Audacious Software Group" + +# The short X.Y version +version = repo_version +# The full version, including alpha/beta/rc tags +release = "" + +# -- General configuration --------------------------------------------------- + +install("sphinx_rtd_theme") + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.githubpages", + "myst_parser", +] + +myst_heading_anchors = 2 +# enable replacement of (tm) & friends +myst_enable_extensions = ["replacements"] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +source_suffix = { + ".rst": "restructuredtext", + ".txt": "markdown", + ".md": "markdown", +} + +from recommonmark.parser import CommonMarkParser + +source_parsers = {".md": CommonMarkParser} + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + +# options for latex output +latex_engine = "lualatex" +latex_show_urls = "footnote" + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "Omniperfdoc" + +html_theme_options = { + "analytics_id": "G-C5DYLCE9ED", # Provided by Google in your dashboard + "analytics_anonymize_ip": False, + "logo_only": False, + "display_version": True, + "prev_next_buttons_location": "bottom", + "style_external_links": False, + "vcs_pageview_mode": "", + # 'style_nav_header_background': 'white', + # Toc options + "collapse_navigation": True, + "sticky_navigation": True, + "navigation_depth": 4, + "includehidden": True, + "titles_only": False, +} + +from pygments.styles import get_all_styles + +# The name of the Pygments (syntax highlighting) style to use. +styles = list(get_all_styles()) +preferences = ("emacs", "pastie", "colorful") +for pref in preferences: + if pref in styles: + pygments_style = pref + break + +from recommonmark.transform import AutoStructify + + +# app setup hook +def setup(app): + app.add_config_value( + "recommonmark_config", + { + "auto_toc_tree_section": "Contents", + "enable_eval_rst": True, + "enable_auto_doc_ref": False, + }, + True, + ) + app.add_transform(AutoStructify) + app.add_config_value("docstring_replacements", {}, True) + app.connect("source-read", replaceString) + + +# function to replace version string througout documentation + + +def replaceString(app, docname, source): + result = source[0] + for key in app.config.docstring_replacements: + result = result.replace(key, app.config.docstring_replacements[key]) + source[0] = result + + +docstring_replacements = {"{__VERSION__}": version} diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/faq.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/faq.md new file mode 100644 index 0000000000..87d2f33c15 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/faq.md @@ -0,0 +1,55 @@ +# FAQ + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +**1. How do I export profiling data I've already generated using Omniperf?** + +In order to interact with the Grafana GUI you must sync data with the MongoDB backend. This interaction is done through ***database*** mode. + +Simply pass the directory of your desired workload like so, +```shell +$ omniperf database --import -w -H -u -t +``` +**2. python ast error: 'Constant' object has no attribute 'kind'** + +This comes from a bug in the default astunparse 1.6.3 with python 3.8. Seems good with python 3.7 and 3.9. + +Workaround: +```shell +$ pip3 uninstall astunparse +$ pip3 astunparse +``` + +**3. tabulate doesn't print properly** +Workaround: +```shell +$ export LC_ALL=C.UTF-8 +$ export LANG=C.UTF-8 +``` + +**3. How can I SSH Tunnel in MobaXterm?** + +1. Open MobaXterm +2. In the top ribbon, select `Tunneling` +![Tunnel Button](images/tunnel_demo1.png) +This pop up will appear +![Pop up](images/tunnel_demo2.png) +3. Press `New SSH tunnel` +![Pop up](images/tunnel_demo3.png) +4. Configure tunnel accordingly + + Local clients + - Forwarded Port: [PORT] + + Remote Server + - Remote Server: localhost + - Remote Port: [PORT] + + SSH Server + - SSH server: Name of the server one is connecting to + - SSH login: Username to login to the server + - SSH port: 22 diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md new file mode 100644 index 0000000000..1ee28a496a --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md @@ -0,0 +1,93 @@ +# Getting Started + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +## Quickstart + +1. **Launch & Profile the target application with the command line profiler** + + The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or IP blocks. If not specified, Omniperf will default to collecting all available counters for all kernels/dispatches launched by the user's executable. + + To collect the default set of data for all kernels in the target application, launch, e.g.: + ```shell + $ omniperf profile -n vcopy_data -- ./vcopy 1048576 256 + ``` + The app runs, each kernel is launched, and profiling results are generated. By default, results are written to (e.g.,) ./workloads/vcopy_data (configurable via the `-n` argument). To collect all requested profile information, it may be required to replay kernels multiple times. + +2. **Customize data collection** + + Options are available to specify for which kernels/metrics data should be collected. + Note that filtering can be applied either in the profiling or analysis stage, however filtering at during profiling collection will often speed up your overall profiling run time. + + Some common filters include: + + - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID + - `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) IP Blocks. + + To view available metrics by IP Block you can use the `--list-metrics` argument to view a list of all available metrics organized by IP Block. + ```shell + $ omniperf analyze --list-metrics + ``` + +3. **Analyze at the command line** + + After generating a local output folder (./workloads/\), the command line tool can also be used to quickly interface with profiling results. View different metrics derived from your profiled results and get immediate access all metrics organized by IP block. + + If no kernel, dispatch, or ipblock filters are applied at this stage, analysis will be reflective of the entirety of the profiling data. + + To interact with profiling results from a different session, users just provide the workload path. `-p`/`--path` enables users to analyze existing profiling data in the Omniperf CLI. + +4. **Analyze in the Grafana GUI** + + To conduct a more in-depth analysis of profiling results we recommend users utilize the Omniperf Grafana GUI. To interact with profiling results, users must import their data to the MongoDB instance included in the Omniperf dockerfile. + + To interact with Grafana GUI data, stored in the Omniperf DB, users can enter ***database*** mode. For example: + ```shell + $ omniperf database --import [CONNECTION OPTIONS] + ``` + +## Usage + +### Modes +Modes change the fundamental behavior of the Omniperf command line tool. Depending on which mode is chosen, different command line options become available. + +- **Profile**: Target application is launched on the local system utilizing AMD’s [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). Depending on the profiling options chosen, selected kernels, dispatches, and/or IP Blocks in the application are profiled and results are stored locally in an output folder (./workloads/\). + + ```shell + $ omniperf profile --help + ``` + +- **Analyze**: Profiling data from `-p`/`--path` directory is loaded into the Omniperf CLI analyzer where users have immediate access to profiling results and generated metrics. Metrics are quickly generated from the entirety of your profiled application or a subset you’ve identified through the Omniperf CLI analysis filters. + + To gererate a lightweight GUI interface users can add the `--gui` flag to their analysis command. + + This mode is designed to be a middle ground to the highly detailed Omniperf Grafana GUI and is great for users who want immediate access to an IP Block they’re already familiar with. + + ```shell + $ omniperf analyze --help + ``` + +- **Database**: Our detailed Grafana GUI is built on a MongoDB database. `--import` profiling results to the DB to interact with the workload in Grafana or `--remove` the workload from the DB. + + Connection options will need to be specified. See the [*Grafana + Analysis*](https://rocm.github.io/omniperf/analysis.html#grafana-gui-import) import section + for more details on this. + + ```shell + $ omniperf database --help + ``` + +## Basic Operations + +Operation | Mode | Required Arguments +:--|:--|:-- +Profile a workload | profile | `--name`, `-- ` +Standalone roofline analysis | profile | `--name`, `--roof-only`, `-- ` +Import a workload to database | database | `--import`, `--host`, `--username`, `--workload`, `--team` +Remove a workload from database | database | `--remove`, `--host`, `--username`, `--workload`, `--team` +Launch standalone GUI from CLI | analyze | `--path`, `--gui` +Interact with profiling results from CLI | analyze | `--path` diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/high_level_design.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/high_level_design.md new file mode 100644 index 0000000000..be46a47531 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/high_level_design.md @@ -0,0 +1,20 @@ +# High Level Design + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +The [Omniperf](https://github.com/ROCm/omniperf) Tool is architecturally composed of three major components, as shown in the following figure. + +- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [rocProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html). The counters are stored in a comma-seperated value, for further analyis. A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. The roofline model is not available on earlier accelerators. + +- **Omniperf Grafana Analyzer**: + - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. Compatibility of previously generated data between Omniperf versions is not necessarily guarenteed. + - *Grafana GUI Analyzer*: A Grafana dashboard is designed to retrieve the raw counters info from the backend database. It also creates the relevant performance metrics and visualization. +- **Omniperf Standalone GUI Analyzer**: A standalone GUI is provided to enable performance analysis without importing data into the backend database. + +![Omniperf Architectual Diagram](images/omniperf_server_vs_client_install.png) + +> Note: To learn more about the client vs. server model of Omniperf and our install process please see the [Deployment section](./installation.md) of the docs. diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Arithmetic_operations.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Arithmetic_operations.png new file mode 100644 index 0000000000..e1be12851f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Arithmetic_operations.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Command_processor.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Command_processor.png new file mode 100644 index 0000000000..bb0342fa44 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Command_processor.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Comp_pipe_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Comp_pipe_sol.png new file mode 100644 index 0000000000..47bb08d9fe Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Comp_pipe_sol.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Compute_pipeline_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Compute_pipeline_stats.png new file mode 100644 index 0000000000..95ad3fca81 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Compute_pipeline_stats.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_l2_interface.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_l2_interface.png new file mode 100644 index 0000000000..5e0f5ba8b9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_l2_interface.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_stats.png new file mode 100644 index 0000000000..6790cf6575 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_stats.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png new file mode 100644 index 0000000000..811bf99692 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruc_cache_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruc_cache_sol.png new file mode 100644 index 0000000000..ba50e4aaf4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruc_cache_sol.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_cache_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_cache_stats.png new file mode 100644 index 0000000000..fdc9c750cc Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_cache_stats.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_mix.png new file mode 100644 index 0000000000..22cce1306c Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_mix.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Kernel_time_histogram.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Kernel_time_histogram.png new file mode 100644 index 0000000000..8ec0fd83ba Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Kernel_time_histogram.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1D_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1D_sol.png new file mode 100644 index 0000000000..94999672d1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1D_sol.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_cache_stalls.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_cache_stalls.png new file mode 100644 index 0000000000..1c9df98bb5 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_cache_stalls.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions.png new file mode 100644 index 0000000000..85b40eb4a4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions_per_channel.png new file mode 100644 index 0000000000..7b839ab0fe Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_utcl1_transactions.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_utcl1_transactions.png new file mode 100644 index 0000000000..91928cf75b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_utcl1_transactions.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_accesses.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_accesses.png new file mode 100644 index 0000000000..f6c8059385 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_accesses.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_sol.png new file mode 100644 index 0000000000..6530f63a6b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_sol.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_latencies_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_latencies_per_channel.png new file mode 100644 index 0000000000..a0b3471974 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_latencies_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls.png new file mode 100644 index 0000000000..bf7b4a59a5 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls_per_channel.png new file mode 100644 index 0000000000..ac1c5dffb1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions.png new file mode 100644 index 0000000000..d6da29b625 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions_per_channel.png new file mode 100644 index 0000000000..669eb444d8 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png new file mode 100644 index 0000000000..d5a1c2c072 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png new file mode 100644 index 0000000000..49d584621d Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_sol.png new file mode 100644 index 0000000000..b4dfe95df6 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_sol.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_stats.png new file mode 100644 index 0000000000..426f45f850 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_stats.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png new file mode 100644 index 0000000000..5bab0f3940 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_chart_analysis.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_chart_analysis.png new file mode 100644 index 0000000000..977733f07e Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_chart_analysis.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_latencies.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_latencies.png new file mode 100644 index 0000000000..3b97d72e0d Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_latencies.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Roofline_analysis.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Roofline_analysis.png new file mode 100644 index 0000000000..36efd2ea77 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Roofline_analysis.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Shader_processing_input.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Shader_processing_input.png new file mode 100644 index 0000000000..169ffed021 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Shader_processing_input.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_info_panel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_info_panel.png new file mode 100644 index 0000000000..245f05aeb5 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_info_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_speed_of_light.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_speed_of_light.png new file mode 100644 index 0000000000..d677331279 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_speed_of_light.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_address.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_address.png new file mode 100644 index 0000000000..7370d3ac20 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_address.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_data.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_data.png new file mode 100644 index 0000000000..3267d6e85c Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_data.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_dispatches.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_dispatches.png new file mode 100644 index 0000000000..31d13a0a2f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_dispatches.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_kernels.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_kernels.png new file mode 100644 index 0000000000..17b8ef7da2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_kernels.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png new file mode 100644 index 0000000000..0a3597283b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png new file mode 100644 index 0000000000..112b14da32 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_accesses.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_accesses.png new file mode 100644 index 0000000000..4c5391683d Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_accesses.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_sol.png new file mode 100644 index 0000000000..acec5e1b36 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_sol.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Wavefront_launch.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Wavefront_launch.png new file mode 100644 index 0000000000..d0e587f282 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Wavefront_launch.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_config.jpg b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_config.jpg new file mode 100644 index 0000000000..4210d9036b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_config.jpg differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_settings.jpg b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_settings.jpg new file mode 100644 index 0000000000..f472362544 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_settings.jpg differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/global_variables.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/global_variables.png new file mode 100644 index 0000000000..87f49b5e14 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/global_variables.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_welcome.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_welcome.png new file mode 100644 index 0000000000..e564c0a389 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_welcome.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_workload_selection.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_workload_selection.png new file mode 100644 index 0000000000..3ecdc35e72 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_workload_selection.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/import_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/import_dashboard.png new file mode 100644 index 0000000000..29be7ea584 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/import_dashboard.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/install_decision_tree.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/install_decision_tree.png new file mode 100644 index 0000000000..1c62fba87b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/install_decision_tree.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_architecture.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_architecture.png new file mode 100644 index 0000000000..966ac2d608 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_architecture.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_server_vs_client_install.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_server_vs_client_install.png new file mode 100644 index 0000000000..8c43dba9e2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_server_vs_client_install.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/opening_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/opening_dashboard.png new file mode 100644 index 0000000000..5e6c7ea625 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/opening_dashboard.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/sample-roof-plot.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/sample-roof-plot.png new file mode 100644 index 0000000000..2deaba7ad2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/sample-roof-plot.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/standalone_gui.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/standalone_gui.png new file mode 100644 index 0000000000..a8abd81694 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/standalone_gui.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo1.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo1.png new file mode 100644 index 0000000000..bda64883c4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo1.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo2.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo2.png new file mode 100644 index 0000000000..8b2d258521 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo2.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo3.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo3.png new file mode 100644 index 0000000000..76cd7ed9a9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo3.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/index.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/index.md new file mode 100644 index 0000000000..0a8231be64 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/index.md @@ -0,0 +1,21 @@ +# Welcome to the [Omniperf](https://github.com/ROCm/omniperf) Documentation! + +```{warning} +This version of the documentation is archived and contains out-of-date information. +See [Omniperf documentation](https://rocm.docs.amd.com/projects/omniperf/en/latest/index.html) for the latest version. +``` + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 + :caption: Table of Contents + + introduction + high_level_design + installation + getting_started + profiling + analysis + faq +``` diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/installation.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/installation.md new file mode 100644 index 0000000000..f1599b4c7c --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/installation.md @@ -0,0 +1,242 @@ +# Deployment + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +Omniperf is broken into two installation components: + +1. **Omniperf Client-side (_Required_)** + - Provides core application profiling capability + - Allows collection of performance counters, filtering by IP block, dispatch, kernel, etc + - CLI based analysis mode + - Stand alone web interface for importing analysis metrics +2. **Omniperf Server-side (_Optional_)** + - Mongo DB backend + Grafana instance + - Packaged in a Docker container for easy setup + +Determine what you need to install based on how you'd like to interact with Omniperf. See the decision tree below to help determine what installation is right for you. + +![Omniperf Installtion Decision Tree](images/install_decision_tree.png) + +--- + +## Client-side Installation + +Omniperf client-side requires the following basic software dependencies prior to usage: + +* Python (>=3.7) +* CMake (>= 3.19) +* ROCm (>= 5.2.0) + +In addition, Omniperf leverages a number of Python packages that are +documented in the top-level `requirements.txt` file. These must be +installed prior to Omniperf configuration. + +The recommended procedure for Omniperf usage is to install into a shared file system so that multiple users can access the final installation. The following steps illustrate how to install the necessary python dependencies using [pip](https://packaging.python.org/en/latest/) and Omniperf into a shared location controlled by the `INSTALL_DIR` environment variable. + +```{admonition} Configuration variables +The following installation example leverages several +[CMake](https://cmake.org/cmake/help/latest/) project variables +defined as follows: +| Variable | Description | +| -------------------- | -------------------------------------------------------------------- | +| CMAKE_INSTALL_PREFIX | controls install path for Omniperf files | +| PYTHON_DEPS | provides optional path to resolve Python package dependencies | +| MOD_INSTALL_PATH | provides optional path for separate Omniperf modulefile installation | + +``` + +A typical install will begin by downloading the latest release tarball +available from the +[Releases](https://github.com/ROCm/omniperf/releases) section +of the Omniperf development site. From there, untar and descend into +the top-level directory as follows: + +```shell +$ tar xfz omniperf-v{__VERSION__}.tar.gz +$ cd omniperf-v{__VERSION__} +``` + +Next, install Python dependencies and complete the Omniperf configuration/install process as follows: + +```shell +# define top-level install path +$ export INSTALL_DIR= + +# install python deps +$ python3 -m pip install -t ${INSTALL_DIR}/python-libs -r requirements.txt + +# configure Omniperf for shared install +$ mkdir build +$ cd build +$ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/{__VERSION__} \ + -DPYTHON_DEPS=${INSTALL_DIR}/python-libs \ + -DMOD_INSTALL_PATH=${INSTALL_DIR}/modulefiles .. + +# install +$ make install +``` + +```{tip} +You may require `sudo` during the final install step if you +do not have write access to the chosen install path. +``` + + +After completing these steps, a successful top-level installation directory looks as follows: +```shell +$ ls $INSTALL_DIR +modulefiles {__VERSION__} python-libs +``` + +### Execution using modulefiles + +The installation process includes creation of an environment +modulefile for use with [Lmod](https://lmod.readthedocs.io). On +systems that support Lmod, a user can register the Omniperf modulefile +directory and setup their environment for execution of Omniperf as +follows: + + + +```shell +$ module use $INSTALL_DIR/modulefiles +$ module load omniperf +$ which omniperf +/opt/apps/omniperf/{__VERSION__}/bin/omniperf + +$ omniperf --version +ROC Profiler: /opt/rocm-5.1.0/bin/rocprof + +omniperf (v{__VERSION__}) +``` + +```{tip} Users relying on an Lmod Python module locally may wish to +customize the resulting Omniperf modulefile post-installation to +include additional module dependencies. +``` + +### Execution without modulefiles + +To use Omniperf without the companion modulefile, update your `PATH` +settings to enable access to the command-line binary. If you installed Python +dependencies in a shared location, update your `PYTHONPATH` config as well: + +```shell +export PATH=$INSTALL_DIR/{__VERSION__}/bin:$PATH +export PYTHONPATH=$INSTALL_DIR/python-libs +``` + +### rocProf + +Omniperf relies on a rocprof binary during the profiling +process. Normally the path to this binary will be detected +automatically, but it can also be overridden via the setting the +optional `ROCPROF` environment variable to the path of the binary the user +wishes to use instead. + + + + + +%%% ### Generate Packaging +%%% ```console +%%% cd build +%%% cpack -G STGZ +%%% cpack -G DEB -D CPACK_PACKAGING_INSTALL_PREFIX=/opt/omniperf +%%% cpack -G RPM -D CPACK_PACKAGING_INSTALL_PREFIX=/opt/omniperf +%%% ``` + +--- + +## Server-side Setup + +> Note: Server-side setup is not required to profile or analyze performance data from the CLI. It is provided as an additional mechanism to import performance data for examination within a detailed [Grafana](https://github.com/grafana/grafana) GUI. + +Omniperf server-side requires the following basic software dependencies prior to usage: + +* [Docker Engine](https://docs.docker.com/engine/install/) + +The recommended process for enabling the server-side of Omniperf is to use the provided Docker file to build the Grafana and MongoDB instance. + +Once you've decided which machine you'd like to use to host the Grafana and MongoDB instance, please follow the set up instructions below. + +### 1) Install MongoDB Utils +Omniperf uses [mongoimport](https://www.mongodb.com/docs/database-tools/mongoimport/) to upload data to Grafana's backend database. Install for Ubuntu 20.04 is as follows: + +```bash +$ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb +$ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb +``` +> Installation instructions for alternative distributions can be found [here](https://www.mongodb.com/download-center/database-tools/releases/archive) + +### 2) Persistent Storage + +The user will also bind MongoDB to a directory on the host OS to create a local backup in case of a crash or reset. In the Docker world, this is known as "creating a persistent volume": + +```bash +$ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/ +$ sudo mkdir -p grafana-storage mongodb +$ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/grafana-storage --opt o=bind grafana-storage +$ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/mongodb --opt o=bind grafana-mongo-db +``` + +### 3) Build and Launch + +We're now ready to build our Docker file. Navigate to your Omniperf install directory to begin. +```bash +$ sudo docker-compose build +$ sudo docker-compose up -d +``` +> Note that TCP ports for Grafana (4000) and MongoDB (27017) in the docker container are mapped to 14000 and 27018, respectively, on the host side. + +### 4) Setup Grafana Instance +Once you've launced your docker container you should be able to reach Grafana at **http://\:14000**. The default login credentials for the first-time Grafana setup are: + +- Username: **admin** +- Password: **admin** + +![Grafana Welcome Page](images/grafana_welcome.png) + +MongoDB Datasource Configuration + +The MongoDB Datasource must be configured prior to the first-time use. Navigate to Grafana's Configuration page (shown below) to add the **Omniperf Data** connection. + +![Omniperf Datasource Config](images/datasource_config.jpg) + +Configure the following fields in the datasource settings: + +- HTTP URL: set to *http://localhost:3333* +- MongoDB URL: set to *mongodb://temp:temp123@\:27018/admin?authSource=admin* +- Database Name: set to *admin* + +After properly configuring these fields click **Save & Test** (as shown below) to make sure your connection is successful. + +> Note to avoid potential DNS issue, one may need to use the actual IP address for the host node in the MongoDB URL. + +![Datasource Settings](images/datasource_settings.jpg) + +Omniperf Dashboard Import + +From *Create* → *Import*, (as shown below) upload the dashboard file, `/dashboards/Omniperf_v{__VERSION__}_pub.json`, from the Omniperf tarball. + +Edit both the Dashboard Name and the Unique Identifier (UID) to uniquely identify the dashboard he/she will use. Click Import to finish the process. + +![Import Dashboard](images/import_dashboard.png) + +Using your dashboard + +Once you've imported a dashboard you're ready to begin! Start by browsing availible dashboards and selecting the dashboard you've just imported. + +![Opening your dashboard](images/opening_dashboard.png) + +Remeber, you'll need to upload workload data to the DB backend before analyzing in your Grafana interface. We provide a detailed example of this in our [Analysis section](./analysis.md#grafana-gui-import). + +After a workload has been successfully uploaded, you should be able to select it from the workload dropdown located at the top of your Grafana dashboard. + +![Selecting Grafana workload](images/grafana_workload_selection.png) + +For more information on how to use the Grafana interface for anlysis please see the [Grafana section](./analysis.md#grafana-based-gui) in the Analyze Mode tab. diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/introduction.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/introduction.md new file mode 100644 index 0000000000..5ba0e68fee --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/introduction.md @@ -0,0 +1,56 @@ +# Introduction + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +[Browse Omniperf source code on Github](https://github.com/ROCm/omniperf) + +## Scope + +MI Performance Profiler ([Omniperf](https://github.com/ROCm/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD Instinct (tm) Accelerators. It is currently built on top of the [rocProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html) to monitor hardware performance counters. The Omniperf tool primarily targets accelerators in the MI100 and MI200 families. Development is in progress to support MI300 and Radeon (tm) RDNA (tm) GPUs. + +## Features + +The Omniperf tool performs system profiling based on all available hardware counters for the target accelerator. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... + +Both command line analysis and GUI analysis are supported. + +Detailed Feature List: +- MI100 support +- MI200 support +- Standalone GUI Analyzer +- Grafana/MongoDB GUI Analyzer +- Dispatch Filtering +- Kernel Filtering +- GPU ID Filtering +- Baseline Comparison +- Multi-Normalizations +- System Info Panel +- System Speed-of-Light Panel +- Kernel Statistic Panel +- Memory Chart Analysis Panel +- Roofline Analysis Panel (*Supported on MI200 only, SLES 15 SP3 or RHEL8*) +- Command Processor (CP) Panel +- Shader Processing Input (SPI) Panel +- Wavefront Launch Panel +- Compute Unit - Instruction Mix Panel +- Compute Unit - Pipeline Panel +- Local Data Share (LDS) Panel +- Instruction Cache Panel +- Scalar L1D Cache Panel +- Texture Addresser and Data Panel +- Vector L1D Cache Panel +- L2 Cache Panel +- L2 Cache (per-Channel) Panel + +## Compatible SOCs + +| Platform | Status | +| :------- | :------------- | +| Vega 20 (MI-50/60) | No | +| MI100 | Supported | +| MI200 | Supported | +| MI300 | In development | diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/make.bat b/projects/rocprofiler-compute/docs/archive/docs-1.x/make.bat new file mode 100644 index 0000000000..27f573b87a --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md new file mode 100644 index 0000000000..d3e6831a00 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md @@ -0,0 +1,425 @@ +# Profile Mode + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 5 +``` + +The [Omniperf](https://github.com/ROCm/omniperf) repository +includes source code for a sample GPU compute workload, +__vcopy.cpp__. A copy of this file is available in the `share/sample` +subdirectory after a normal Omniperf installation, or via the +`$OMNIPERF_SHARE/sample` directory when using the supplied modulefile. + +A compiled version of this workload is used throughout the following +sections to demonstrate the use of Omniperf in MI GPU performance +analysis. Unless otherwise noted, the performance analysis is done on +the MI200 platform. + +## Workload Compilation +**vcopy compilation:** +```shell-session +$ hipcc vcopy.cpp -o vcopy +$ ls +vcopy vcopy.cpp +$ ./vcopy 1048576 256 +Finished allocating vectors on the CPU +Finished allocating vectors on the GPU +Finished copying vectors to the GPU +sw thinks it moved 1.000000 KB per wave +Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384: +Launching the kernel on the GPU +Finished executing kernel +Finished copying the output vector from the GPU to the CPU +Releasing GPU memory +Releasing CPU memory +``` + +## Omniperf Profiling +The *omniperf* script, availible through the [Omniperf](https://github.com/ROCm/omniperf) repository, is used to aquire all necessary perfmon data through analysis of compute workloads. + +**omniperf help:** +```shell-session +$ omniperf profile --help +ROC Profiler: /usr/bin/rocprof + +usage: + +omniperf profile --name [profile options] [roofline options] -- + + + +------------------------------------------------------------------------------- + +Examples: + + omniperf profile -n vcopy_all -- ./vcopy 1048576 256 + + omniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256 + + omniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256 + + omniperf profile -n vcopy_disp -d 0 -- ./vcopy 1048576 256 + + omniperf profile -n vcopy_roof --roof-only -- ./vcopy 1048576 256 + +------------------------------------------------------------------------------- + + + +Help: + -h, --help show this help message and exit + +General Options: + -v, --version show program's version number and exit + -V, --verbose Increase output verbosity + +Profile Options: + -n , --name Assign a name to workload. + -p , --path Specify path to save workload. + (DEFAULT: /home/colramos/GitHub/omniperf/workloads/) + -k [ ...], --kernel [ ...] Kernel filtering. + -b [ ...], --ipblocks [ ...] IP block filtering: + SQ + SQC + TA + TD + TCP + TCC + SPI + CPC + CPF + -d [ ...], --dispatch [ ...] Dispatch ID filtering. + --no-roof Profile without collecting roofline data. + -- [ ...] Provide command for profiling after double dash. + +Standalone Roofline Options: + --roof-only Profile roofline data only. + --sort Overlay top kernels or top dispatches: (DEFAULT: kernels) + kernels + dispatches + -m , --mem-level Filter by memory level: (DEFAULT: ALL) + HBM + L2 + vL1D + LDS + --device GPU device ID. (DEFAULT: ALL) + --kernel-names Include kernel names in roofline plot. +``` + +The following sample command profiles the *vcopy* workload. + +**vcopy profiling:** +```shell-session +$ omniperf profile --name vcopy -- ./vcopy 1048576 256 +Resolving rocprof +ROC Profiler: /usr/bin/rocprof + + +------------- +Profile only +------------- + +omniperf ver: 1.0.8-PR1 +Path: /home/colramos/GitHub/omniperf-pub/workloads +Target: mi200 +Command: /home/colramos/vcopy 1048576 256 +Kernel Selection: None +Dispatch Selection: None +IP Blocks: All +Log: /home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/log.txt + +/home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/perfmon/SQ_INST_LEVEL_SMEM.txt +RPL: on '230411_165021' from '/opt/rocm-5.2.1' in '/home/colramos/GitHub/omniperf-pub' +RPL: profiling '""/home/colramos/vcopy 1048576 256""' +RPL: input file '/home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/perfmon/SQ_INST_LEVEL_SMEM.txt' +RPL: output dir '/tmp/rpl_data_230411_165021_26406' +RPL: result dir '/tmp/rpl_data_230411_165021_26406/input0_results_230411_165021' +Finished allocating vectors on the CPU +ROCProfiler: input from "/tmp/rpl_data_230411_165021_26406/input0.xml" + gpu_index = + kernel = + range = + 3 metrics + SQ_INSTS_SMEM, SQ_INST_LEVEL_SMEM, SQ_ACCUM_PREV_HIRES +Finished allocating vectors on the GPU +Finished copying vectors to the GPU +sw thinks it moved 1.000000 KB per wave +Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384: +Launching the kernel on the GPU +Finished executing kernel +Finished copying the output vector from the GPU to the CPU +Releasing GPU memory +Releasing CPU memory + +... ... +ROCPRofiler: 1 contexts collected, output directory /tmp/rpl_data_220527_130317_1787038/input_results_220527_130317 +File 'workloads/vcopy/mi200/timestamps.csv' is generating +Total detected GPU devices: 2 +GPU Device 0: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +HBM BW, GPU ID: 0, workgroupSize:256, workgroups:2097152, experiments:100, traffic:8589934592 bytes, duration:6.2 ms, mean:1382.7 GB/sec, stdev=2.4 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +L2 BW, GPU ID: 0, workgroupSize:256, workgroups:8192, experiments:100, traffic:687194767360 bytes, duration:157.9 ms, mean:4358.7 GB/sec, stdev=4.7 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +L1 BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:26843545600 bytes, duration:3.3 ms, mean:8247.1 GB/sec, stdev=5.1 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +LDS BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:33554432000 bytes, duration:2.4 ms, mean:14246.3 GB/sec, stdev=29.5 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak FLOPs (FP32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:274877906944, duration:14.507 ms, mean:18949.6 GFLOPS, stdev=4.5 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak FLOPs (FP64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:137438953472, duration:7.5 ms, mean:18308.197266.1 GFLOPS, stdev=3.6 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (BF16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:14.0 ms, mean:153574.8 GFLOPS, stdev=79.9 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:14.5 ms, mean:147680.1 GFLOPS, stdev=34.7 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:536870912000, duration:14.5 ms, mean:37142.1 GFLOPS, stdev=8.4 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:268435456000, duration:7.3 ms, mean:36919.5 GFLOPS, stdev=14.1 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA IOPs (I8), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, IOP:2147483648000, duration:14.4 ms, mean:149570.6 GOPS, stdev=41.7 GOPS +GPU Device 1: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +HBM BW, GPU ID: 1, workgroupSize:256, workgroups:2097152, experiments:100, traffic:8589934592 bytes, duration:6.2 ms, mean:1382.7 GB/sec, stdev=2.9 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +L2 BW, GPU ID: 1, workgroupSize:256, workgroups:8192, experiments:100, traffic:687194767360 bytes, duration:157.6 ms, mean:4371.0 GB/sec, stdev=4.1 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +L1 BW, GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, traffic:26843545600 bytes, duration:3.2 ms, mean:8297.4 GB/sec, stdev=11.6 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +LDS BW, GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, traffic:33554432000 bytes, duration:1.8 ms, mean:18839.2 GB/sec, stdev=44.5 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak FLOPs (FP32), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, FLOP:274877906944, duration:14.441 ms, mean:19037.6 GFLOPS, stdev=2.7 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak FLOPs (FP64), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, FLOP:137438953472, duration:7.5 ms, mean:18402.255859.1 GFLOPS, stdev=20.1 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (BF16), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:13.9 ms, mean:154240.3 GFLOPS, stdev=119.3 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F16), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:14.5 ms, mean:148450.1 GFLOPS, stdev=112.6 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F32), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, FLOP:536870912000, duration:14.4 ms, mean:37335.2 GFLOPS, stdev=43.1 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F64), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, FLOP:268435456000, duration:7.2 ms, mean:37105.3 GFLOPS, stdev=39.5 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA IOPs (I8), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments:100, IOP:2147483648000, duration:14.3 ms, mean:150317.8 GOPS, stdev=203.5 GOPS +``` +You'll notice two stages in *default* Omniperf profiling. The first stage collects all the counters needed for Omniperf analysis (omitting any filters you've provided). The second stage collects data for the roofline analysis (this stage can be disabled using `--no-roof`) + +At the end of the profiling, all resulting csv files should be located in a SOC specific target directory, e.g.: + - "mi200" for the AMD Instinct (tm) MI-200 family of accelerators + - "mi100" for the AMD Instinct (tm) MI-100 family of accelerators +etc. The SOC names are generated as a part of Omniperf, and do not necessarily distinguish between different accelerators in the same family (e.g., an AMD Instinct (tm) MI-210 vs an MI-250) + +> Note: Additionally, you'll notice a few extra files. An SoC parameters file, *sysinfo.csv*, is created to reflect the target device settings. All profiling output is stored in *log.txt*. Roofline specific benchmark results are stored in *roofline.csv*. + +```shell +$ ls workloads/vcopy/mi200/ +total 112 +drwxrwxr-x 3 colramos colramos 4096 Apr 11 16:42 . +drwxrwxr-x 3 colramos colramos 4096 Apr 11 16:42 .. +-rw-rw-r-- 1 colramos colramos 40750 Apr 11 16:44 log.txt +drwxrwxr-x 2 colramos colramos 4096 Apr 11 16:42 perfmon +-rw-rw-r-- 1 colramos colramos 25877 Apr 11 16:42 pmc_perf.csv +-rw-rw-r-- 1 colramos colramos 1716 Apr 11 16:44 roofline.csv +-rw-rw-r-- 1 colramos colramos 429 Apr 11 16:42 SQ_IFETCH_LEVEL.csv +-rw-rw-r-- 1 colramos colramos 366 Apr 11 16:42 SQ_INST_LEVEL_LDS.csv +-rw-rw-r-- 1 colramos colramos 391 Apr 11 16:42 SQ_INST_LEVEL_SMEM.csv +-rw-rw-r-- 1 colramos colramos 384 Apr 11 16:42 SQ_INST_LEVEL_VMEM.csv +-rw-rw-r-- 1 colramos colramos 509 Apr 11 16:42 SQ_LEVEL_WAVES.csv +-rw-rw-r-- 1 colramos colramos 498 Apr 11 16:42 sysinfo.csv +-rw-rw-r-- 1 colramos colramos 309 Apr 11 16:42 timestamps.csv +``` + +### Filtering +To reduce profiling time and the counters collected one may use profiling filters. Profiling filters and their functionality depend on the underlying profiler being used. While Omniperf is profiler agnostic, we've provided a detailed description of profiling filters available when using Omniperf with [rocProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html) below. + + + +Filtering Options: + +- The `-k` \ flag allows for kernel filtering. Useage is equivalent with the current rocprof utility ([see details below](#kernel-filtering)). + +- The `-d` \ flag allows for dispatch ID filtering. Useage is equivalent with the current rocprof utility ([see details below](#dispatch-filtering)). + +- The `-b` \ allows system profiling on one or more selected IP blocks to speed up the profiling process. One can gradually incorporate more IP blocks, without overwriting performance data acquired on other IP blocks. + +```{note} +Be cautious while combining different profiling filters in the same call. Conflicting filters may result in error. + +i.e. filtering dispatch X, but dispatch X does not match your kernel name filter +``` + +#### IP Block Filtering +One can profile a selected IP Block to speed up the profiling process. All profiling results are accumulated in the same target directory, without overwriting those for other IP blocks, hence enabling the incremental profiling and analysis. + +The following example only gathers hardware counters for SQ and TCC, skipping all other IP Blocks: +```shell +$ omniperf profile --name vcopy -b SQ TCC -- ./sample/vcopy 1048576 256 +Resolving rocprof +ROC Profiler: /usr/bin/rocprof + + +------------- +Profile only +------------- + +omniperf ver: 1.0.8-PR1 +Path: /home/colramos/GitHub/omniperf-pub/workloads +Target: mi200 +Command: /home/colramos/vcopy 1048576 256 +Kernel Selection: None +Dispatch Selection: None +IP Blocks: ['SQ', 'TCC'] +fname: pmc_sq_perf2: Added +fname: pmc_td_perf: Skipped +fname: pmc_tcc2_perf: Skipped +fname: pmc_tcp_perf: Skipped +fname: pmc_spi_perf: Skipped +fname: pmc_sq_perf4: Added +fname: pmc_sqc_perf1: Skipped +fname: pmc_tcc_perf: Added +fname: pmc_cpf_perf: Skipped +fname: pmc_sq_perf8: Added +fname: pmc_cpc_perf: Skipped +fname: pmc_sq_perf1: Added +fname: pmc_ta_perf: Skipped +fname: pmc_sq_perf3: Added +fname: pmc_sq_perf6: Added +Log: /home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/log.txt +... +``` + +#### Kernel Filtering +Kernel filtering is based on the name of the kernel(s) you'd like to isolate. Use a kernel name substring list to isolate desired kernels. + +The following example demonstrates profiling isolating the kernel matching substring "vecCopy": +```shell +$ omniperf profile --name vcopy -k vecCopy -- ./vcopy 1048576 256 +Resolving rocprof +ROC Profiler: /usr/bin/rocprof + + +------------- +Profile only +------------- + +omniperf ver: 1.0.8-PR1 +Path: /home/colramos/GitHub/omniperf-pub/workloads +Target: mi200 +Command: /home/colramos/vcopy 1048576 256 +Kernel Selection: ['vecCopy'] +Dispatch Selection: None +IP Blocks: All +Log: /home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/log.txt + +/home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/perfmon/SQ_INST_LEVEL_SMEM.txt +RPL: on '230411_170300' from '/opt/rocm-5.2.1' in '/home/colramos/GitHub/omniperf-pub' +RPL: profiling '""/home/colramos/vcopy 1048576 256""' +RPL: input file '/home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/perfmon/SQ_INST_LEVEL_SMEM.txt' +RPL: output dir '/tmp/rpl_data_230411_170300_29696' +RPL: result dir '/tmp/rpl_data_230411_170300_29696/input0_results_230411_170300' +Finished allocating vectors on the CPU +ROCProfiler: input from "/tmp/rpl_data_230411_170300_29696/input0.xml" + gpu_index = + kernel = vecCopy + +... ... +``` + +#### Dispatch Filtering +Dispatch filtering is based on the *global* dispatch index of kernels in a run. + +The following example profiles only the 0th dispatched kernel in execution of the application: +```shell-session +$ omniperf profile --name vcopy -d 0 -- ./vcopy 1048576 256 +Resolving rocprof +ROC Profiler: /usr/bin/rocprof + + +------------- +Profile only +------------- + +omniperf ver: 1.0.8-PR1 +Path: /home/colramos/GitHub/omniperf-pub/workloads +Target: mi200 +Command: /home/colramos/vcopy 1048576 256 +Kernel Selection: None +Dispatch Selection: ['0'] +IP Blocks: All +Log: /home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/log.txt + +/home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/perfmon/SQ_INST_LEVEL_SMEM.txt +RPL: on '230411_170356' from '/opt/rocm-5.2.1' in '/home/colramos/GitHub/omniperf-pub' +RPL: profiling '""/home/colramos/vcopy 1048576 256""' +RPL: input file '/home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200/perfmon/SQ_INST_LEVEL_SMEM.txt' +RPL: output dir '/tmp/rpl_data_230411_170356_30314' +RPL: result dir '/tmp/rpl_data_230411_170356_30314/input0_results_230411_170356' +Finished allocating vectors on the CPU +ROCProfiler: input from "/tmp/rpl_data_230411_170356_30314/input0.xml" + gpu_index = + kernel = + range = 0 +... +``` + + + +### Standalone Roofline +If you're only interested in generating roofline analysis data try using `--roof-only`. This will only collect counters relevent to roofline, as well as generate a standalone .pdf output of your roofline plot. + +Standalone Roofline Options: + +- The `--sort` \ allows you to specify whether you'd like to overlay top kernel or top dispatch data in your roofline plot. + +- The `-m` \ allows you to specify specific level(s) of cache you'd like to include in your roofline plot. + +- The `--device` \ allows you to specify a device id to collect performace data from when running our roofline benchmark on your system. + +- If you'd like to distinguish different kernels in your .pdf roofline plot use `--kernel-names`. This will give each kernel a unique marker identifiable from the plot's key. + + +#### Roofline Only +The following example demonstrates profiling roofline data only: +```shell-session +$ omniperf profile --name vcopy --roof-only -- ./vcopy 1048576 256 +Resolving rocprof +ROC Profiler: /usr/bin/rocprof + + +-------- +Roofline only +-------- + +Checking for roofline.csv in /home/colramos/GitHub/omniperf-pub/workloads/vcopy/mi200 +No roofline data found. Generating... +Empirical Roofline Calculation +Copyright © 2022 Advanced Micro Devices, Inc. All rights reserved. +Total detected GPU devices: 4 +GPU Device 0: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + ... ... +Checking for roofline.csv in /home/colramos/GitHub/omniperf-pub/workloads/mix/mi200 +Checking for sysinfo.csv in /home/colramos/GitHub/omniperf-pub/workloads/mix/mi200 +Checking for pmc_perf.csv in /home/colramos/GitHub/omniperf-pub/workloads/mix/mi200 +Empirical Roofline PDFs saved! +``` +An inspection of our workload output folder shows .pdf plots were generated successfully +```shell-session +$ ls workloads/vcopy/mi200/ +total 176 +drwxrwxr-x 3 colramos colramos 4096 Apr 11 17:18 . +drwxrwxr-x 3 colramos colramos 4096 Apr 11 17:15 .. +-rw-rw-r-- 1 colramos colramos 13271 Apr 11 17:18 empirRoof_gpu-ALL_fp32.pdf +-rw-rw-r-- 1 colramos colramos 13175 Apr 11 17:18 empirRoof_gpu-ALL_int8_fp16.pdf +-rw-rw-r-- 1 colramos colramos 26560 Apr 11 17:16 log.txt +drwxrwxr-x 2 colramos colramos 4096 Apr 11 17:16 perfmon +-rw-rw-r-- 1 colramos colramos 54031 Apr 11 17:16 pmc_perf.csv +-rw-rw-r-- 1 colramos colramos 1714 Apr 11 17:16 roofline.csv +-rw-rw-r-- 1 colramos colramos 457 Apr 11 17:16 sysinfo.csv +-rw-rw-r-- 1 colramos colramos 37521 Apr 11 17:16 timestamps.csv +``` +A sample *empirRoof_gpu-ALL_fp32.pdf* looks something like this: + +![Sample Standalone Roof Plot](images/sample-roof-plot.png) diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/.gitignore b/projects/rocprofiler-compute/docs/archive/docs-2.x/.gitignore new file mode 100644 index 0000000000..df638b8b20 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/.gitignore @@ -0,0 +1,5 @@ +/build* +/_build +/_doxygen +/.gitinfo +/omniperf.dox diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/.nojekyll b/projects/rocprofiler-compute/docs/archive/docs-2.x/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/Makefile b/projects/rocprofiler-compute/docs/archive/docs-2.x/Makefile new file mode 100644 index 0000000000..c3854a5224 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = ../_build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/README b/projects/rocprofiler-compute/docs/archive/docs-2.x/README new file mode 100644 index 0000000000..d888e91be2 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/README @@ -0,0 +1,6 @@ +This subdirectory houses the input markup for Omniperf documentation using +Sphinx. Changes committed here on the main branch will automatically be built +and pushed live using a Github action. + +You can build a local copy of the documentation in this directory using +"make html" assuming you have the necessary sphinx dependencies installed. diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/VERSION b/projects/rocprofiler-compute/docs/archive/docs-2.x/VERSION new file mode 100644 index 0000000000..38f77a65b3 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/VERSION @@ -0,0 +1 @@ +2.0.1 diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/_static/css/custom.css b/projects/rocprofiler-compute/docs/archive/docs-2.x/_static/css/custom.css new file mode 100644 index 0000000000..d7bcbff234 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/_static/css/custom.css @@ -0,0 +1,7 @@ +.noscroll-table td { + white-space: normal !important; +} + +.wy-nav-content { + max-width: 75% !important; +} diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/analysis.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/analysis.md new file mode 100644 index 0000000000..5eed26e8bc --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/analysis.md @@ -0,0 +1,1038 @@ +# Analyze Mode + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` +Omniperf offers several ways to interact with the metrics it generates from profiling. The option you choose will likely be influenced by your familiarity with the profiled application, computing environment, and experience with Omniperf. + +While analyzing with the CLI offers quick and straightforward access to Omniperf metrics from terminal, the GUI adds an extra layer of styling and interactiveness some users may prefer. + +See sections below for more information on each. + +```{note} +Profiling results from the [aforementioned vcopy workload](profiling.md#workload-compilation) will be used in the following sections to demonstrate the use of Omniperf in MI GPU performance analysis. Unless otherwise noted, the performance analysis is done on the MI200 platform. +``` + +## CLI Analysis + +### Features + +- __Derived metrics__: All of Omniperf's built-in metrics. +- __Baseline comparison__: Compare multiple runs in a side-by-side manner. +- __Metric customization__: Isolate a subset of built-in metrics or build your own profiling configuration. +- __Filtering__: Hone in on a particular kernel, gpu-id, and/or dispatch-id via post-process filtering. + +Run `omniperf analyze -h` for more details. + +### Demo + +1) To begin, generate a high-level analysis report utilizing Omniperf's `-b` (a.k.a. `--block`) flag. +```shell-session +$ omniperf analyze -p workloads/vcopy/MI200/ -b 2 + + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +Analysis mode = cli +[analysis] deriving Omniperf metrics... + +-------------------------------------------------------------------------------- +0. Top Stats +0.1 Top Kernels +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ Kernel_Name │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ vecCopy(double*, double*, double*, int, │ 1.00 │ 20160.00 │ 20160.00 │ 20160.00 │ 100.00 │ +│ │ int) [clone .kd] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ +0.2 Dispatch List +╒════╤═══════════════╤══════════════════════════════════════════════════════════╤══════════╕ +│ │ Dispatch_ID │ Kernel_Name │ GPU_ID │ +╞════╪═══════════════╪══════════════════════════════════════════════════════════╪══════════╡ +│ 0 │ 0 │ vecCopy(double*, double*, double*, int, int) [clone .kd] │ 0 │ +╘════╧═══════════════╧══════════════════════════════════════════════════════════╧══════════╛ + + +-------------------------------------------------------------------------------- +2. System Speed-of-Light +2.1 Speed-of-Light +╒═════════════╤═══════════════════════════╤═════════╤══════════════════╤══════════╤═══════════════╕ +│ Metric_ID │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ +╞═════════════╪═══════════════════════════╪═════════╪══════════════════╪══════════╪═══════════════╡ +│ 2.1.0 │ VALU FLOPs │ 0.0 │ Gflop │ 22630.4 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.1 │ VALU IOPs │ 364.09 │ Giop │ 22630.4 │ 1.61 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.2 │ MFMA FLOPs (BF16) │ 0.0 │ Gflop │ 181043.2 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.3 │ MFMA FLOPs (F16) │ 0.0 │ Gflop │ 181043.2 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.4 │ MFMA FLOPs (F32) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.5 │ MFMA FLOPs (F64) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.6 │ MFMA IOPs (Int8) │ 0.0 │ Giop │ 181043.2 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.7 │ Active CUs │ 70.0 │ Cus │ 104.0 │ 67.31 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.8 │ SALU Utilization │ 3.78 │ Pct │ 100.0 │ 3.78 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.9 │ VALU Utilization │ 5.4 │ Pct │ 100.0 │ 5.4 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.10 │ MFMA Utilization │ 0.0 │ Pct │ 100.0 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.11 │ VMEM Utilization │ 1.08 │ Pct │ 100.0 │ 1.08 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.12 │ Branch Utilization │ 1.08 │ Pct │ 100.0 │ 1.08 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.13 │ VALU Active Threads │ 64.0 │ Threads │ 64.0 │ 100.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.14 │ IPC │ 0.21 │ Instr/cycle │ 5.0 │ 4.13 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.15 │ Wavefront Occupancy │ 2488.86 │ Wavefronts │ 3328.0 │ 74.79 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.16 │ Theoretical LDS Bandwidth │ 0.0 │ Gb/s │ 22630.4 │ 0.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.17 │ LDS Bank Conflicts/Access │ │ Conflicts/access │ 32.0 │ │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.18 │ vL1D Cache Hit Rate │ 50.0 │ Pct │ 100.0 │ 50.0 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.19 │ vL1D Cache BW │ 1664.41 │ Gb/s │ 11315.2 │ 14.71 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.20 │ L2 Cache Hit Rate │ 35.74 │ Pct │ 100.0 │ 35.74 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.21 │ L2 Cache BW │ 1296.31 │ Gb/s │ 3481.6 │ 37.23 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.22 │ L2-Fabric Read BW │ 416.52 │ Gb/s │ 1638.4 │ 25.42 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.23 │ L2-Fabric Write BW │ 292.3 │ Gb/s │ 1638.4 │ 17.84 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.24 │ L2-Fabric Read Latency │ 262.85 │ Cycles │ │ │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.25 │ L2-Fabric Write Latency │ 307.4 │ Cycles │ │ │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.26 │ sL1D Cache Hit Rate │ 99.82 │ Pct │ 100.0 │ 99.82 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.27 │ sL1D Cache BW │ 208.05 │ Gb/s │ 6092.8 │ 3.41 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.28 │ L1I Hit Rate │ 99.91 │ Pct │ 100.0 │ 99.91 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.29 │ L1I BW │ 208.05 │ Gb/s │ 6092.8 │ 3.41 │ +├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤ +│ 2.1.30 │ L1I Fetch Latency │ 20.86 │ Cycles │ │ │ +╘═════════════╧═══════════════════════════╧═════════╧══════════════════╧══════════╧═══════════════╛ + +... +``` +2. Use `--list-metrics` to generate a list of available metrics for inspection +```shell-session +$ omniperf analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a + + ___ _ __ +/ _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| +\___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +Analysis mode = cli +[analysis] deriving Omniperf metrics... +0 -> Top Stats +1 -> System Info +2 -> System Speed-of-Light + 2.1 -> Speed-of-Light + 2.1.0 -> VALU FLOPs + 2.1.1 -> VALU IOPs + 2.1.2 -> MFMA FLOPs (BF16) + 2.1.3 -> MFMA FLOPs (F16) + 2.1.4 -> MFMA FLOPs (F32) + 2.1.5 -> MFMA FLOPs (F64) + 2.1.6 -> MFMA IOPs (Int8) + 2.1.7 -> Active CUs + 2.1.8 -> SALU Utilization + 2.1.9 -> VALU Utilization + 2.1.10 -> MFMA Utilization + 2.1.11 -> VMEM Utilization + 2.1.12 -> Branch Utilization + 2.1.13 -> VALU Active Threads + 2.1.14 -> IPC + 2.1.15 -> Wavefront Occupancy + 2.1.16 -> Theoretical LDS Bandwidth + 2.1.17 -> LDS Bank Conflicts/Access + 2.1.18 -> vL1D Cache Hit Rate + 2.1.19 -> vL1D Cache BW + 2.1.20 -> L2 Cache Hit Rate + 2.1.21 -> L2 Cache BW + 2.1.22 -> L2-Fabric Read BW + 2.1.23 -> L2-Fabric Write BW + 2.1.24 -> L2-Fabric Read Latency + 2.1.25 -> L2-Fabric Write Latency + 2.1.26 -> sL1D Cache Hit Rate + 2.1.27 -> sL1D Cache BW + 2.1.28 -> L1I Hit Rate + 2.1.29 -> L1I BW + 2.1.30 -> L1I Fetch Latency +... +``` +3. Choose your own customized subset of metrics with `-b` (a.k.a. `--block`), or build your own config following [config_template](https://github.com/ROCm/omniperf/blob/amd-mainline/src/rocprof_compute_analyze/configs/panel_config_template.yaml). Below shows how to generate a report containing only metric 2 (a.k.a. System Speed-of-Light). +```shell-session +$ omniperf analyze -p workloads/vcopy/MI200/ -b 2 +-------- +Analyze +-------- + +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20000.00 │ 20000.00 │ 20000.00 │ 100.00 │ +│ │ int) [clone .kd] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +2. System Speed-of-Light +╒═════════╤═══════════════════════════╤═══════════════════════╤══════════════════╤════════════════════╤════════════════════════╕ +│ Index │ Metric │ Value │ Unit │ Peak │ PoP │ +╞═════════╪═══════════════════════════╪═══════════════════════╪══════════════════╪════════════════════╪════════════════════════╡ +│ 2.1.0 │ VALU FLOPs │ 0.0 │ Gflop │ 22630.4 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.1 │ VALU IOPs │ 367.0016 │ Giop │ 22630.4 │ 1.6217194570135745 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.2 │ MFMA FLOPs (BF16) │ 0.0 │ Gflop │ 90521.6 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.3 │ MFMA FLOPs (F16) │ 0.0 │ Gflop │ 181043.2 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.4 │ MFMA FLOPs (F32) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.5 │ MFMA FLOPs (F64) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.6 │ MFMA IOPs (Int8) │ 0.0 │ Giop │ 181043.2 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.7 │ Active CUs │ 74 │ Cus │ 104 │ 71.15384615384616 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.8 │ SALU Util │ 4.016057506716307 │ Pct │ 100 │ 4.016057506716307 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.9 │ VALU Util │ 5.737225009594725 │ Pct │ 100 │ 5.737225009594725 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.10 │ MFMA Util │ 0.0 │ Pct │ 100 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.11 │ VALU Active Threads/Wave │ 64.0 │ Threads │ 64 │ 100.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.12 │ IPC - Issue │ 1.0 │ Instr/cycle │ 5 │ 20.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.13 │ LDS BW │ 0.0 │ Gb/sec │ 22630.4 │ 0.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.14 │ LDS Bank Conflict │ │ Conflicts/access │ 32 │ │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.15 │ Instr Cache Hit Rate │ 99.91306912556854 │ Pct │ 100 │ 99.91306912556854 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.16 │ Instr Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.17 │ Scalar L1D Cache Hit Rate │ 99.81986908342313 │ Pct │ 100 │ 99.81986908342313 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.18 │ Scalar L1D Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.19 │ Vector L1D Cache Hit Rate │ 50.0 │ Pct │ 100 │ 50.0 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.20 │ Vector L1D Cache BW │ 1677.7216 │ Gb/s │ 11315.199999999999 │ 14.82714932126697 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.21 │ L2 Cache Hit Rate │ 35.55067615693325 │ Pct │ 100 │ 35.55067615693325 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.22 │ L2-Fabric Read BW │ 419.8496 │ Gb/s │ 1638.4 │ 25.6255859375 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.23 │ L2-Fabric Write BW │ 293.9456 │ Gb/s │ 1638.4 │ 17.941015625 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.24 │ L2-Fabric Read Latency │ 256.6482321288385 │ Cycles │ │ │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.25 │ L2-Fabric Write Latency │ 317.2264255699014 │ Cycles │ │ │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.26 │ Wave Occupancy │ 1821.723057333852 │ Wavefronts │ 3328 │ 54.73927455931046 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.27 │ Instr Fetch BW │ 4.174722306564298e-08 │ Gb/s │ 3046.4 │ 1.3703789084047721e-09 │ +├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ +│ 2.1.28 │ Instr Fetch Latency │ 21.729248046875 │ Cycles │ │ │ +╘═════════╧═══════════════════════════╧═══════════════════════╧══════════════════╧════════════════════╧════════════════════════╛ +``` + +```{note} +Some cells may be blank indicating a missing/unavailable hardware counter or NULL value +``` + +3. Optimize application, iterate, and re-profile to inspect performance changes. +4. Redo a comprehensive analysis with Omniperf CLI at any milestone or at the end. + +### More options + +- __Single run__ + ```shell + $ omniperf analyze -p workloads/vcopy/MI200/ + ``` + +- __List top kernels and dispatches__ + ```shell + $ omniperf analyze -p workloads/vcopy/MI200/ --list-stats + ``` + +- __List metrics__ + + ```shell + $ omniperf analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a + ``` + +- __Show "System Speed-of-Light" and "CS_Busy" blocks only__ + + ```shell + $ omniperf analyze -p workloads/vcopy/MI200/ -b 2 5.1.0 + ``` + + ```{note} + Users can filter single metric or the whole hardware component by its id. In this case, 1 is the id for "system speed of light" and 5.1.0 the id for metric "GPU Busy Cycles". + ``` + +- __Filter kernels__ + + First, list the top kernels in your application using `--list-stats`. + ```shell-session + $ omniperf analyze -p workloads/vcopy/MI200/ --list-stats + + Analysis mode = cli + [analysis] deriving Omniperf metrics... + + -------------------------------------------------------------------------------- + Detected Kernels (sorted descending by duration) + ╒════╤══════════════════════════════════════════════╕ + │ │ Kernel_Name │ + ╞════╪══════════════════════════════════════════════╡ + │ 0 │ vecCopy(double*, double*, double*, int, int) │ + ╘════╧══════════════════════════════════════════════╛ + + -------------------------------------------------------------------------------- + Dispatch list + ╒════╤═══════════════╤══════════════════════════════════════════════╤══════════╕ + │ │ Dispatch_ID │ Kernel_Name │ GPU_ID │ + ╞════╪═══════════════╪══════════════════════════════════════════════╪══════════╡ + │ 0 │ 0 │ vecCopy(double*, double*, double*, int, int) │ 0 │ + ╘════╧═══════════════╧══════════════════════════════════════════════╧══════════╛ + + ``` + + Second, select the index of the kernel you would like to filter (i.e. __vecCopy(double*, double*, double*, int, int) [clone .kd]__ at index __0__). Then, use this index to apply the filter via `-k/--kernels`. + + ```shell-session + $ omniperf analyze -p workloads/vcopy/MI200/ -k 0 + + Analysis mode = cli + [analysis] deriving Omniperf metrics... + + -------------------------------------------------------------------------------- + 0. Top Stats + 0.1 Top Kernels + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╤═════╕ + │ │ Kernel_Name │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ S │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╪═════╡ + │ 0 │ vecCopy(double*, double*, double*, int, │ 1.00 │ 18560.00 │ 18560.00 │ 18560.00 │ 100.00 │ * │ + │ │ int) │ │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╧═════╛ + ... ... + ``` + + ```{note} + You will see your filtered kernel(s) indicated by an asterisk in the Top Stats table + ``` + + +- __Baseline comparison__ + + ```shell + omniperf analyze -p workload1/path/ -p workload2/path/ + ``` + OR + ```shell + omniperf analyze -p workload1/path/ -k 0 -p workload2/path/ -k 1 + ``` + + +## GUI Analysis + +### Web-based GUI + +#### Features + +Omniperf's standalone GUI analyzer is a lightweight web page that can +be generated directly from the command-line. This option is provided +as an alternative for users wanting to explore profiling results +graphically, but without the additional setup requirements or +server-side overhead of Omniperf's detailed [Grafana +interface](analysis.md#grafana-based-gui) +option. The standalone GUI analyzer is provided as simple +[Flask](https://flask.palletsprojects.com/en/2.2.x/) application +allowing users to view results from within a web browser. + +```{admonition} Port forwarding + +Note that the standalone GUI analyzer publishes a web interface on port 8050 by default. +On production HPC systems where profiling jobs run +under the auspices of a resource manager, additional SSH tunneling +between the desired web browser host (e.g. login node or remote workstation) and compute host may be +required. Alternatively, users may find it more convenient to download +profiled workloads to perform analysis on their local system. + +See [FAQ](faq.md) for more details on SSH tunneling. +``` + +#### Usage + +To launch the standalone GUI, include the `--gui` flag with your desired analysis command. For example: + +```shell-session +$ omniperf analyze -p workloads/vcopy/MI200/ --gui + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +Analysis mode = web_ui +[analysis] deriving Omniperf metrics... +Dash is running on http://0.0.0.0:8050/ + + * Serving Flask app 'rocprof_compute_analyze.analysis_webui' (lazy loading) + * Environment: production + WARNING: This is a development server. Do not use it in a production deployment. + Use a production WSGI server instead. + * Debug mode: off + * Running on all addresses (0.0.0.0) + WARNING: This is a development server. Do not use it in a production deployment. + * Running on http://127.0.0.1:8050 + * Running on http://10.228.33.172:8050 (Press CTRL+C to quit) +``` + +At this point, users can then launch their web browser of choice and +go to http://localhost:8050/ to see an analysis page. + + + +![Standalone GUI Homepage](images/standalone_gui.png) + +```{tip} +To launch the web application on a port other than 8050, include an optional port argument: +`--gui ` +``` + +When no filters are applied, users will see five basic sections derived from their application's profiling data: + +1. Memory Chart Analysis +2. Empirical Roofline Analysis +3. Top Stats (Top Kernel Statistics) +4. System Info +5. System Speed-of-Light + +To dive deeper, use the top drop down menus to isolate particular +kernel(s) or dispatch(s). You will then see the web page update with +metrics specific to the filter you have applied. + +Once you have applied a filter, you will also see several additional +sections become available with detailed metrics specific to that area +of AMD hardware. These detailed sections mirror the data displayed in +Omniperf's [Grafana +interface](analysis.md#grafana-based-gui). + +### Grafana-based GUI + +#### Features +The Omniperf Grafana GUI Analyzer supports the following features to facilitate MI GPU performance profiling and analysis: + +- System and Hardware Component (Hardware Block) Speed-of-Light (SOL) +- Multiple normalization options, including per-cycle, per-wave, per-kernel and per-second. +- Baseline comparisons +- Regex based Dispatch ID filtering +- Roofline Analysis +- Detailed performance counters and metrics per hardware component, e.g., + - Command Processor - Fetch (CPF) / Command Processor - Controller (CPC) + - Workgroup Manager (SPI) + - Shader Sequencer (SQ) + - Shader Sequencer Controller (SQC) + - L1 Address Processing Unit, a.k.a. Texture Addresser (TA) / L1 Backend Data Processing Unit, a.k.a. Texture Data (TD) + - L1 Cache (TCP) + - L2 Cache (TCC) (both aggregated and per-channel perf info) + +##### Speed-of-Light +Speed-of-light panels are provided at both the system and per hardware component level to help diagnosis performance bottlenecks. The performance numbers of the workload under testing are compared to the theoretical maximum, (e.g. floating point operations, bandwidth, cache hit rate, etc.), to indicate the available room to further utilize the hardware capability. + +##### Multi Normalization + +Multiple performance number normalizations are provided to allow performance inspection within both HW and SW context. The following normalizations are permitted: +- per cycle +- per wave +- per kernel +- per second + +##### Baseline Comparison +Omniperf enables baseline comparison to allow checking A/B effect. Currently baseline comparison is limited to the same SoC. Cross comparison between SoCs is in development. + +For both the Current Workload and the Baseline Workload, one can independently setup the following filters to allow fine grained comparisons: +- Workload Name +- GPU ID filtering (multi-selection) +- Kernel Name filtering (multi-selection) +- Dispatch ID filtering (Regex filtering) +- Omniperf Panels (multi-selection) + +##### Regex based Dispatch ID filtering +Omniperf enables Regular Expression (regex), a standard Linux string matching syntax, based dispatch ID filtering to flexibly choose the kernel invocations. One may refer to [Regex Numeric Range Generator](https://3widgets.com/), to generate typical number ranges. + +For example, if one wants to inspect Dispatch Range from 17 to 48, inclusive, the corresponding regex is : **(1[7-9]|[23]\d|4[0-8])**. The generated expression can be copied over for filtering. + +##### Incremental Profiling +Omniperf supports incremental profiling to significantly speed up performance analysis. + +> Refer to [*Hardware Component Filtering*](profiling.md#hardware-component-filtering) section for this command. + +By default, the entire application is profiled to collect performance counters for all hardware blocks, giving a complete view of where the workload stands in terms of performance optimization opportunities and bottlenecks. + +After that one may focus on only a few hardware components, (e.g., L1 Cache or LDS) to closely check the effect of software optimizations, without performing application replay for all other hardware components. This saves lots of compute time. In addition, the prior profiling results for other hardware components are not overwritten. Instead, they can be merged during the import to piece together the system view. + +##### Color Coding +The uniform color coding is applied to most visualizations (bars, table, diagrams etc). Typically, Yellow color means over 50%, while Red color mean over 90% percent, for easy inspection. + +##### Global Variables and Configurations + +![Grafana GUI Global Variables](images/global_variables.png) + +#### Grafana GUI Import +The omniperf database `--import` option imports the raw profiling data to Grafana's backend MongoDB database. This step is only required for Grafana GUI based performance analysis. + +Default username and password for MongoDB (to be used in database mode) are as follows: + + - Username: **temp** + - Password: **temp123** + +Each workload is imported to a separate database with the following naming convention: + + omniperf___ + +e.g., omniperf_asw_vcopy_mi200. + +When using database mode, be sure to tailor the connection options to the machine hosting your [sever-side instance](./installation.md). Below is the sample command to import the *vcopy* profiling data, lets assuming our host machine is called "dummybox". + +```shell-session +$ omniperf database --help +usage: + +omniperf database [connection options] + + + +------------------------------------------------------------------------------- + +Examples: + + omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/ + + omniperf database --remove -H pavii1 -u temp -w omniperf_asw_sample_mi200 + +------------------------------------------------------------------------------- + + + +Help: + -h, --help show this help message and exit + +General Options: + -v, --version show program's version number and exit + -V, --verbose Increase output verbosity (use multiple times for higher levels) + -s, --specs Print system specs. + +Interaction Type: + -i, --import Import workload to Omniperf DB + -r, --remove Remove a workload from Omniperf DB + +Connection Options: + -H , --host Name or IP address of the server host. + -P , --port TCP/IP Port. (DEFAULT: 27018) + -u , --username Username for authentication. + -p , --password The user's password. (will be requested later if it's not set) + -t , --team Specify Team prefix. + -w , --workload Specify name of workload (to remove) or path to workload (to import) + --kernel-verbose Specify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5) +``` + +**omniperf import for vcopy:** +```shell-session +$ omniperf database --import -H dummybox -u temp -t asw -w workloads/vcopy/mi200/ + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + + +Pulling data from /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +The directory exists +Found sysinfo file +KernelName shortening enabled +Kernel name verbose level: 2 +Password: +Password received +-- Conversion & Upload in Progress -- + 0%| | 0/11 [00:00 Note: The Memory Chart Analysis support multiple normalizations. Due to the space limit, all transactions, when normalized to per-sec, default to unit of Billion transactions per second. + +``` {figure} images/memory-chart_panel.png +:alt: Memory Chart Analysis +:figclass: figure +:align: center + +A graphical representation of performance data for memory blocks on the GPU. +``` + +##### Empirical Roofline Analysis +``` {figure} images/roofline_panel.png +:alt: Roofline Analysis +:figclass: figure +:align: center + +Visualize achieved performance relative to a benchmarked peak performance. +``` + +##### Command Processor +###### Command Processor Fetcher +``` {figure} images/cpc_panel.png +:alt: Command Processor Fetcher +:figclass: figure +:align: center + +Fetches commands out of memory to hand them over to the Command Processor Fetcher (CPC) for processing +``` +###### Command Processor Compute +``` {figure} images/cpf_panel.png +:alt: Command Processor Compute +:figclass: figure +:align: center + +The micro-controller running the command processing firmware that decodes the fetched commands, and (for kernels) passes them to the Workgroup Managers (SPIs) for scheduling. +``` + +##### Shader Processor Input (SPI) +###### SPI Stats +``` {figure} images/spi-stats_panel.png +:alt: SPI Stats +:figclass: figure +:align: center + +TODO: Add caption after merge +``` +###### SPI Resource Allocation +``` {figure} images/spi-resource-allocation_panel.png +:alt: SPI Resource Allocation +:figclass: figure +:align: center + +TODO: Add caption after merge +``` + +##### Wavefront +###### Wavefront Launch Stats +``` {figure} images/wavefront-launch-stats_panel.png +:alt: Wavefront Launch Stats +:figclass: figure +:align: center + +General information about the kernel launch. +``` +###### Wavefront Runtime Stats +``` {figure} images/wavefront-runtime-stats_panel.png +:alt: Wavefront Runtime Stats +:figclass: figure +:align: center + +High-level overview of the execution of wavefronts in a kernel. +``` + +##### Compute Unit - Instruction Mix +###### Instruction Mix +``` {figure} images/cu-inst-mix_panel.png +:alt: Instruction Mix +:figclass: figure +:align: center + +Breakdown of the various types of instructions executed by the user’s kernel, and which pipelines on the Compute Unit (CU) they were executed on. +``` +###### VALU Arithmetic Instruction Mix +``` {figure} images/cu-value-arith-instr-mix_panel.png +:alt: VALU Arithmetic Instruction Mix +:figclass: figure +:align: center + +The various types of vector instructions that were issued to the vector arithmetic logic unit (VALU). +``` +###### MFMA Arithmetic Instruction Mix +``` {figure} images/cu-mafma-arith-instr-mix_panel.png +:alt: MFMA Arithmetic Instruction Mix +:figclass: figure +:align: center + +The types of Matrix Fused Multiply-Add (MFMA) instructions that were issued. +``` +###### VMEM Arithmetic Instruction Mix +``` {figure} images/cu-vmem-instr-mix_panel.png +:alt: VMEM Arithmetic Instruction Mix +:figclass: figure +:align: center + +The types of vector memory (VMEM) instructions that were issued. +``` + +##### Compute Unit - Compute Pipeline +###### Speed-of-Light +``` {figure} images/cu-sol_panel.png +:alt: Speed-of-Light +:figclass: figure +:align: center + +The number of floating-point and integer operations executed on the vector arithmetic logic unit (VALU) and Matrix Fused Multiply-Add (MFMA) units in various precisions. +``` +###### Pipeline Stats +``` {figure} images/cu-pipeline-stats_panel.png +:alt: Pipeline Stats +:figclass: figure +:align: center + +More detailed metrics to analyze the several independent pipelines found in the Compute Unit (CU). +``` +###### Arithmetic Operations +``` {figure} images/cu-arith-ops_panel.png +:alt: Arithmetic Operations +:figclass: figure +:align: center + +The total number of floating-point and integer operations executed in various precisions. +``` + +##### Local Data Share (LDS) +###### Speed-of-Light +``` {figure} images/lds-sol_panel.png +:alt: Speed-of-Light +:figclass: figure +:align: center + +Key metrics for the Local Data Share (LDS) as a comparison with the peak achievable values of those metrics. +``` +###### LDS Stats +``` {figure} images/lds-stats_panel.png +:alt: LDS Stats +:figclass: figure +:align: center + +More detailed view of the Local Data Share (LDS) performance. +``` + +##### Instruction Cache +###### Speed-of-Light +``` {figure} images/instr-cache-sol_panel.png +:alt: Speed-of-Light +:figclass: figure +:align: center + +Key metrics of the L1 Instruction (L1I) cache as a comparison with the peak achievable values of those metrics. +``` +###### Instruction Cache Stats +``` {figure} images/instr-cache-accesses_panel.png +:alt: Instruction Cache Stats +:figclass: figure +:align: center + +More detail on the hit/miss statistics of the L1 Instruction (L1I) cache. +``` + +##### Scalar L1D Cache +###### Speed-of-Light +``` {figure} images/sl1d-sol_panel.png +:alt: Speed-of-Light +:figclass: figure +:align: center + +Key metrics of the Scalar L1 Data (sL1D) cache as a comparison with the peak achievable values of those metrics. +``` +###### Scalar L1D Cache Accesses +``` {figure} images/sl1d-cache-accesses_panel.png +:alt: Scalar L1D Cache Accesses +:figclass: figure +:align: center + +More detail on the types of accesses made to the Scalar L1 Data (sL1D) cache, and the hit/miss statistics. +``` +###### Scalar L1D Cache - L2 Interface +``` {figure} images/sl1d-l12-interface_panel.png +:alt: Scalar L1D Cache - L2 Interface +:figclass: figure +:align: center + +More detail on the data requested across the Scalar L1 Data (sL1D) cache <-> L2 interface. +``` + +##### Texture Address and Texture Data +###### Texture Addresser +``` {figure} images/ta_panel.png +:alt: Texture Addresser +:figclass: figure +:align: center + +Metric specific to texture addresser (TA) which receives commands (e.g., instructions) and write/atomic data from the Compute Unit (CU), and coalesces them into fewer requests for the cache to process. +``` +###### Texture Data +``` {figure} images/td_panel.png +:alt: Texture Data +:figclass: figure +:align: center + +Metrics specific to texture data (TD) which routes data back to the requesting Compute Unit (CU). +``` + +##### Vector L1 Data Cache +###### Speed-of-Light +``` {figure} images/vl1d-sol_panel.png +:alt: Speed-of-Light +:figclass: figure +:align: center + +Key metrics of the vector L1 data (vL1D) cache as a comparison with the peak achievable values of those metrics. +``` +###### L1D Cache Stalls +``` {figure} images/vl1d-cache-stalls_panel.png +:alt: L1D Cache Stalls +:figclass: figure +:align: center + +More detail on where vector L1 data (vL1D) cache is stalled in the pipeline, which may indicate performance limiters of the cache. +``` +###### L1D Cache Accesses +``` {figure} images/vl1d-cache-accesses_panel.png +:alt: L1D Cache Accesses +:figclass: figure +:align: center + +The type of requests incoming from the cache frontend, the number of requests that were serviced by the vector L1 data (vL1D) cache, and the number & type of outgoing requests to the L2 cache. +``` +###### L1D - L2 Transactions +``` {figure} images/vl1d-l2-transactions_panel.png +:alt: L1D - L2 Transactions +:figclass: figure +:align: center + +A more granular look at the types of requests made to the L2 cache. +``` +###### L1D Addr Translation +``` {figure} images/vl1d-addr-translation_panel.png +:alt: L1D Addr Translation +:figclass: figure +:align: center + +After a vector memory instruction has been processed/coalesced by the address processing unit of the vector L1 data (vL1D) cache, it must be translated from a virtual to physical address. These metrics provide more details on the L1 Translation Lookaside Buffer (TLB) which handles this process. +``` + +##### L2 Cache +###### Speed-of-Light +``` {figure} images/l2-sol_panel.png +:alt: Speed-of-Light +:figclass: figure +:align: center + +Key metrics about the performance of the L2 cache, aggregated over all the L2 channels, as a comparison with the peak achievable values of those metrics. +``` +###### L2 Cache Accesses +``` {figure} images/l2-accesses_panel.png +:alt: L2 Cache Accesses +:figclass: figure +:align: center + +Incoming requests to the L2 cache from the vector L1 data (vL1D) cache and other clients (e.g., the sL1D and L1I caches). +``` +###### L2 - Fabric Transactions +``` {figure} images/l2-fabric-transactions_panel.png +:alt: L2 - Fabric Transactions +:figclass: figure +:align: center + +More detail on the flow of requests through Infinity Fabric™. +``` +###### L2 - Fabric Interface Stalls +``` {figure} images/l2-fabric-interface-stalls_panel.png +:alt: L2 - Fabric Interface Stalls +:figclass: figure +:align: center + +A breakdown of what types of requests in a kernel caused a stall (e.g., read vs write), and to which locations (e.g., to the accelerator’s local memory, or to remote accelerators/CPUs). +``` + +##### L2 Cache Per Channel +###### Aggregate Stats +``` {figure} images/l2-per-channel-agg-stats_panel.png +:alt: Aggregate Stats +:figclass: figure +:align: center + +L2 Cache per channel performance at a glance. Metrics are aggregated over all available channels. +``` diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/conf.py b/projects/rocprofiler-compute/docs/archive/docs-2.x/conf.py new file mode 100644 index 0000000000..1bf84fe5bd --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/conf.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +import subprocess as sp +import sys + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use str(Path().absolute().resolve()) to make it absolute, like shown here. +# +from pathlib import Path + +sys.path.insert(0, str(Path("..").absolute().resolve())) + +repo_version = "unknown" +# Determine short version by file in repo +if Path("./VERSION").is_file(): + with open("./VERSION") as f: + repo_version = f.readline().strip() + + +def install(package): + sp.call([sys.executable, "-m", "pip", "install", package]) + + +# -- Project information ----------------------------------------------------- + +project = "Omniperf" +copyright = "2023-2024, Advanced Micro Devices, Inc. All Rights Reserved" +author = "AMD Research" + +# The short X.Y version +version = repo_version +# The full version, including alpha/beta/rc tags +release = repo_version + +# -- General configuration --------------------------------------------------- + +install("sphinx_rtd_theme") + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.githubpages", + "myst_parser", + "sphinxmark", +] + +show_authors = True + + +myst_heading_anchors = 4 +# enable replacement of (tm) & friends +myst_enable_extensions = ["replacements", "dollarmath"] + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +source_suffix = { + ".rst": "restructuredtext", + ".txt": "markdown", + ".md": "markdown", +} + +# sphinxmark_enable = True +# sphinxmark_image = "text" +# sphinxmark_text = "Release Candidate" +# sphinxmark_text_size = 80 +# sphinxmark_div = "document" +# sphinxmark_fixed = False +# sphinxmark_text_rotation = 30 +# sphinxmark_text_color = (128, 128, 128) +# sphinxmark_text_spacing = 800 +# sphinxmark_text_opacity = 30 + +from recommonmark.parser import CommonMarkParser + +source_parsers = {".md": CommonMarkParser} + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + +# options for latex output +latex_engine = "lualatex" +latex_show_urls = "footnote" + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +latex_elements = { + "sphinxsetup": "verbatimwrapslines=true, verbatimforcewraps=true", +} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "Omniperfdoc" + +html_logo = "images/amd-header-logo.svg" +html_theme_options = { + "analytics_id": "G-C5DYLCE9ED", # Provided by Google in your dashboard + "analytics_anonymize_ip": False, + "logo_only": False, + "display_version": True, + "prev_next_buttons_location": "bottom", + "style_external_links": False, + "vcs_pageview_mode": "", + # 'style_nav_header_background': 'white', + # Toc options + "collapse_navigation": True, + "sticky_navigation": True, + "navigation_depth": 5, + "includehidden": True, + "titles_only": False, +} + +from pygments.styles import get_all_styles + +# The name of the Pygments (syntax highlighting) style to use. +styles = list(get_all_styles()) +preferences = ("emacs", "pastie", "colorful") +for pref in preferences: + if pref in styles: + pygments_style = pref + break + +from recommonmark.transform import AutoStructify + + +# app setup hook +def setup(app): + app.add_config_value( + "recommonmark_config", + { + "auto_toc_tree_section": "Contents", + "enable_eval_rst": True, + "enable_auto_doc_ref": False, + }, + True, + ) + app.add_transform(AutoStructify) + app.add_config_value("docstring_replacements", {}, True) + app.connect("source-read", replaceString) + app.add_css_file("css/custom.css") + + +# function to replace version string througout documentation + + +def replaceString(app, docname, source): + result = source[0] + for key in app.config.docstring_replacements: + result = result.replace(key, app.config.docstring_replacements[key]) + source[0] = result + + +docstring_replacements = {"{__VERSION__}": version} diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/faq.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/faq.md new file mode 100644 index 0000000000..77101dc2e2 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/faq.md @@ -0,0 +1,67 @@ +# FAQ + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +**1. How do I export profiling data I have already generated using Omniperf?** + +In order to interact with the Grafana GUI you must sync data with the MongoDB backend. This interaction is done through ***database*** mode. + +Simply pass the directory of your desired workload like so, +```shell +$ omniperf database --import -w -H -u -t +``` +**2. python ast error: 'Constant' object has no attribute 'kind'** + +This comes from a bug in the default astunparse 1.6.3 with python 3.8. Seems good with python 3.7 and 3.9. + +Workaround: +```shell +$ pip3 uninstall astunparse +$ pip3 astunparse +``` + +**3. tabulate doesn't print properly** +Workaround: +```shell +$ export LC_ALL=C.UTF-8 +$ export LANG=C.UTF-8 +``` + +**3. How can I SSH Tunnel in MobaXterm?** + +1. Open MobaXterm +2. In the top ribbon, select `Tunneling` +``` {image} images/tunnel_demo1.png +:alt: MobaXterm Tunnel Button +:class: bg-primary +:align: center +``` +This pop up will appear +``` {image} images/tunnel_demo2.png +:alt: MobaXterm Pop Up +:class: bg-primary +:align: center +``` +3. Press `New SSH tunnel` +``` {image} images/tunnel_demo3.png +:alt: MobaXterm Pop Up +:class: bg-primary +:align: center +``` +4. Configure tunnel accordingly + + Local clients + - Forwarded Port: [PORT] + + Remote Server + - Remote Server: localhost + - Remote Port: [PORT] + + SSH Server + - SSH server: Name of the server one is connecting to + - SSH login: Username to login to the server + - SSH port: 22 diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md new file mode 100644 index 0000000000..9c8740de19 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md @@ -0,0 +1,112 @@ +# Getting Started + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +## Quickstart + +1. **Launch & Profile the target application with the command line profiler** + + The command line profiler launches the target application, calls the rocProfiler API via the rocProf binary, and collects profile results for the specified kernels, dispatches, and/or hardware components. If not specified, Omniperf will default to collecting all available counters for all kernels/dispatches launched by the user's executable. + + To collect the default set of data for all kernels in the target application, launch, e.g.: + ```shell + $ omniperf profile -n vcopy_data -- ./vcopy -n 1048576 -b 256 + ``` + The app runs, each kernel is launched, and profiling results are generated. By default, results are written to a subdirectory with your accelerator's name e.g., ./workloads/vcopy_data/MI200/ (where name is configurable via the `-n` argument). + + ```{note} + To collect all requested profile information, it may be required to replay kernels multiple times. + ``` + +2. **Customize data collection** + + Options are available to specify for which kernels/metrics data should be collected. + Note that filtering can be applied either in the profiling or analysis stage, however filtering at during profiling collection will often speed up your overall profiling run time. + + Some common filters include: + + - `-k`/`--kernel` enables filtering kernels by name. + - `-d`/`--dispatch` enables filtering based on dispatch ID. + - `-b`/`--block` enables collects metrics for only the specified (one or more) hardware component blocks. + + To view available metrics by hardware Block you can use the `--list-metrics` argument: + ```shell + $ omniperf analyze --list-metrics + ``` + +3. **Analyze at the command line** + + After generating a local output folder (e.g. ./workloads/vcopy_data/MI200), the command line tool can also be used to quickly interface with profiling results. View different metrics derived from your profiled results and get immediate access all metrics organized by hardware blocks. + + If no kernel, dispatch, or hardware block filters are applied at this stage, analysis will be reflective of the entirety of the profiling data. + + To interact with profiling results from a different session, users just provide the workload path. `-p`/`--path` enables users to analyze existing profiling data in the Omniperf CLI. + +4. **Analyze in the Grafana GUI** + + To conduct a more in-depth analysis of profiling results we recommend users utilize the Omniperf Grafana GUI. To interact with profiling results, users must import their data to the MongoDB instance included in the Omniperf dockerfile. + + To interact with Grafana GUI data, stored in the Omniperf DB, users can enter ***database*** mode. For example: + ```shell + $ omniperf database --import [CONNECTION OPTIONS] + ``` + +## Usage + +### Modes +Modes change the fundamental behavior of the Omniperf command line tool. Depending on which mode is chosen, different command line options become available. + +- **Profile**: Target application is launched on the local system using AMD’s [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). Depending on the profiling options chosen, selected kernels, dispatches, and/or hardware components in the application are profiled and results are stored locally in an output folder (./workloads/\). + + ```shell + $ omniperf profile --help + ``` + +- **Analyze**: Profiling data from `-p`/`--path` directory is loaded into the Omniperf CLI analyzer where users have immediate access to profiling results and generated metrics. Metrics are quickly generated from the entirety of your profiled application or a subset you’ve identified through the Omniperf CLI analysis filters. + + To generate a lightweight GUI interface users can add the `--gui` flag to their analysis command. + + This mode is designed to be a middle ground to the highly detailed Omniperf Grafana GUI and is great for users who want immediate access to a hardware component they’re already familiar with. + + ```shell + $ omniperf analyze --help + ``` + +- **Database**: Our detailed Grafana GUI is built on a MongoDB database. `--import` profiling results to the DB to interact with the workload in Grafana or `--remove` the workload from the DB. + + Connection options will need to be specified. See the [*Grafana + Analysis*](analysis.md#grafana-gui-import) import section + for more details on this. + + ```shell + $ omniperf database --help + ``` +### Global Options +The Omniperf command line tool has a set of 'global' options that are available across all modes. + +| Argument | Description | +| :----------------- | :---------------------------------------------------------------- | +| `-v` / `--version` | Print Omniperf version and exit. | +| `-V` / `--verbose` | Increase output verbosity (use multiple times for higher levels). | +| `-q` / `--quiet` | Reduce output and run quietly. | +| `-s` / `--specs` | Print system specs and exit. | + +```{note} +Omniperf also recognizes the project variable, `OMNIPERF_COLOR`, should the user choose to disable colorful output. To disable default colorful behavior, set this variable to `0`. +``` + + +## Basic Operations + +| Operation | Mode | Required Arguments | +| :--------------------------------------- | :------- | :--------------------------------------------------------- | +| Profile a workload | profile | `--name`, `-- ` | +| Standalone roofline analysis | profile | `--name`, `--roof-only`, `-- ` | +| Import a workload to database | database | `--import`, `--host`, `--username`, `--workload`, `--team` | +| Remove a workload from database | database | `--remove`, `--host`, `--username`, `--workload`, `--team` | +| Launch standalone GUI from CLI | analyze | `--path`, `--gui` | +| Interact with profiling results from CLI | analyze | `--path` | diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/high_level_design.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/high_level_design.md new file mode 100644 index 0000000000..4ddb3a1e82 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/high_level_design.md @@ -0,0 +1,22 @@ +# High Level Design + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +The [Omniperf](https://github.com/ROCm/omniperf) Tool is architecturally composed of three major components, as shown in the following figure. + +- **Omniperf Profiling**: Acquire raw performance counters via application replay based on [rocProf](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html). The counters are stored in a comma-separated format, for further analysis. A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. The roofline model is not available on earlier accelerators. + +- **Omniperf Grafana Analyzer**: + - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. Compatibility of previously generated data between Omniperf versions is not necessarily guaranteed. + - *Grafana GUI Analyzer*: A Grafana dashboard is designed to retrieve the raw counters info from the backend database. It also creates the relevant performance metrics and visualization. +- **Omniperf Standalone GUI Analyzer**: A standalone GUI is provided to enable performance analysis without importing data into the backend database. + +![Omniperf Architectural Diagram](images/omniperf_server_vs_client_install.png) + +```{note} +To learn more about the client vs. server model of Omniperf and our install process please see the [Deployment section](./installation.md) of the docs. +``` diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Current_and_baseline_dispatch_ids.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Current_and_baseline_dispatch_ids.png new file mode 100644 index 0000000000..811bf99692 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Current_and_baseline_dispatch_ids.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Kernel_time_histogram.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Kernel_time_histogram.png new file mode 100644 index 0000000000..8ec0fd83ba Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Kernel_time_histogram.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L1_l2_transactions_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L1_l2_transactions_per_channel.png new file mode 100644 index 0000000000..7b839ab0fe Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L1_l2_transactions_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_latencies_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_latencies_per_channel.png new file mode 100644 index 0000000000..a0b3471974 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_latencies_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_stalls_per_channel.png new file mode 100644 index 0000000000..ac1c5dffb1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_stalls_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_stalls_per_channel.png new file mode 100644 index 0000000000..d5a1c2c072 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_stalls_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_starvation_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_starvation_per_channel.png new file mode 100644 index 0000000000..49d584621d Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_starvation_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Memory_latencies.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Memory_latencies.png new file mode 100644 index 0000000000..3b97d72e0d Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Memory_latencies.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Roofline_analysis.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Roofline_analysis.png new file mode 100644 index 0000000000..36efd2ea77 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Roofline_analysis.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_dispatches.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_dispatches.png new file mode 100644 index 0000000000..31d13a0a2f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_dispatches.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_kernels.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_kernels.png new file mode 100644 index 0000000000..17b8ef7da2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_kernels.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/amd-header-logo.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/amd-header-logo.svg new file mode 100644 index 0000000000..6fc59dddd9 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/amd-header-logo.svg @@ -0,0 +1 @@ +AMD-logo-white-v2 diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpc_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpc_panel.png new file mode 100644 index 0000000000..7b7f758588 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpc_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpf_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpf_panel.png new file mode 100644 index 0000000000..a43b878536 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpf_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-arith-ops_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-arith-ops_panel.png new file mode 100644 index 0000000000..073b64d707 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-arith-ops_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-inst-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-inst-mix_panel.png new file mode 100644 index 0000000000..1b9a6d2b25 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-inst-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-mafma-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-mafma-arith-instr-mix_panel.png new file mode 100644 index 0000000000..d74dfd271a Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-mafma-arith-instr-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-pipeline-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-pipeline-stats_panel.png new file mode 100644 index 0000000000..6f572f9148 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-pipeline-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-sol_panel.png new file mode 100644 index 0000000000..8e8f46174f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-value-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-value-arith-instr-mix_panel.png new file mode 100644 index 0000000000..de3750d2d0 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-value-arith-instr-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-vmem-instr-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-vmem-instr-mix_panel.png new file mode 100644 index 0000000000..1d6ce1bc46 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-vmem-instr-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_config.jpg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_config.jpg new file mode 100644 index 0000000000..4210d9036b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_config.jpg differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_settings.jpg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_settings.jpg new file mode 100644 index 0000000000..f472362544 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_settings.jpg differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.png new file mode 100644 index 0000000000..826b4d9de7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.svg new file mode 100644 index 0000000000..1c98d20810 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.svg @@ -0,0 +1,899 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Total Fabric Requests + + + + + 32B Read Requests + + + + 64B Read Requests + + + + 32B Write Requests + + + + + + 64B Write Requests + + + + + + Uncached Read Requests + + + x2 + + + + Uncached Write Requests + + + + + + Atomic +Requests + + + + + + HBM Read +Requests + + + + + Remote Read +Requests + + + + + + + + + + + + + + + + + + + HBM Write Requests + + + + Remote Write Requests + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fig_level_counter.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fig_level_counter.png new file mode 100755 index 0000000000..fa50539a0c Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fig_level_counter.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/gcn_compute_unit.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/gcn_compute_unit.png new file mode 100644 index 0000000000..e6c1f2eb07 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/gcn_compute_unit.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/global_variables.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/global_variables.png new file mode 100644 index 0000000000..87f49b5e14 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/global_variables.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_welcome.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_welcome.png new file mode 100644 index 0000000000..e564c0a389 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_welcome.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_workload_selection.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_workload_selection.png new file mode 100644 index 0000000000..3ecdc35e72 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_workload_selection.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/import_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/import_dashboard.png new file mode 100644 index 0000000000..29be7ea584 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/import_dashboard.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/install_decision_tree.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/install_decision_tree.png new file mode 100644 index 0000000000..1c62fba87b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/install_decision_tree.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-accesses_panel.png new file mode 100644 index 0000000000..926a7805e7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-sol_panel.png new file mode 100644 index 0000000000..64be7178c6 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.png new file mode 100644 index 0000000000..fdabfbb955 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.svg new file mode 100644 index 0000000000..dd22a71319 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.svg @@ -0,0 +1,584 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Compute Unit + Cmd/Data + + + + Address Processing Unit + + + Sync + Data Processing Unit + + Virtual To Physical Address Translation + + Tag RAM + + L1 Cache Controller + + CacheRAM + + L2 Memory Interface + Data + + Bus + + L2 Cache + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-accesses_panel.png new file mode 100644 index 0000000000..101cf77530 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-interface-stalls_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-interface-stalls_panel.png new file mode 100644 index 0000000000..b1bd415ca3 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-interface-stalls_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-transactions_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-transactions_panel.png new file mode 100644 index 0000000000..7df5a78095 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-transactions_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-per-channel-agg-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-per-channel-agg-stats_panel.png new file mode 100644 index 0000000000..704d45c69f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-per-channel-agg-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-sol_panel.png new file mode 100644 index 0000000000..646e608cbc Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-sol_panel.png new file mode 100644 index 0000000000..c261513aa9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-stats_panel.png new file mode 100644 index 0000000000..0d9d419eb7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.png new file mode 100644 index 0000000000..f444eaf539 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.svg new file mode 100644 index 0000000000..c0adb5e912 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.svg @@ -0,0 +1,393 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SIMD 0/1 + SIMD 2/3 + + + + + + Conflict Detection + + + + Scheduler + + + + Bank 0 + + + + Bank 1 + + + + Bank 2 + + + + Bank 3 + + + + Bank 31 + + ... + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.png new file mode 100644 index 0000000000..bd74d62499 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.svg new file mode 100644 index 0000000000..a22a7b84d7 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.svg @@ -0,0 +1,1579 @@ + + + + + + + + 2023-08-21T11:00:20.650499 + image/svg+xml + + + Matplotlib v3.7.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.png new file mode 100644 index 0000000000..ab057f3cd9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.svg new file mode 100644 index 0000000000..c86b4125e4 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.svg @@ -0,0 +1,1050 @@ + + + + + + + + 2023-08-21T11:43:04.336525 + image/svg+xml + + + Matplotlib v3.7.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.png new file mode 100644 index 0000000000..77c0938581 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.svg new file mode 100644 index 0000000000..147da6aa42 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.svg @@ -0,0 +1,1145 @@ + + + + + + + + 2023-08-17T18:14:36.907658 + image/svg+xml + + + Matplotlib v3.7.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/memory-chart_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/memory-chart_panel.png new file mode 100644 index 0000000000..1091a50329 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/memory-chart_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.png new file mode 100644 index 0000000000..a8e5f01649 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.svg new file mode 100644 index 0000000000..d0d9606be5 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_architecture.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_architecture.png new file mode 100644 index 0000000000..966ac2d608 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_architecture.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_server_vs_client_install.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_server_vs_client_install.png new file mode 100644 index 0000000000..8c43dba9e2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_server_vs_client_install.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/opening_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/opening_dashboard.png new file mode 100644 index 0000000000..5e6c7ea625 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/opening_dashboard.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/roofline_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/roofline_panel.png new file mode 100644 index 0000000000..47ee9bddb1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/roofline_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sample-roof-plot.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sample-roof-plot.png new file mode 100644 index 0000000000..2deaba7ad2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sample-roof-plot.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/selayout.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/selayout.png new file mode 100644 index 0000000000..73aa2b49de Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/selayout.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-cache-accesses_panel.png new file mode 100644 index 0000000000..3605cce8a2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-cache-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-l12-interface_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-l12-interface_panel.png new file mode 100644 index 0000000000..5c3480ac9f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-l12-interface_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-sol_panel.png new file mode 100644 index 0000000000..92fa5a1a4a Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sol_panel.png new file mode 100644 index 0000000000..f456500e02 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-resource-allocation_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-resource-allocation_panel.png new file mode 100644 index 0000000000..bee869ad10 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-resource-allocation_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-stats_panel.png new file mode 100644 index 0000000000..19c7ad3645 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.png new file mode 100644 index 0000000000..cca71eb2a4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.svg new file mode 100644 index 0000000000..b033a9e111 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.svg @@ -0,0 +1,64 @@ + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/standalone_gui.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/standalone_gui.png new file mode 100644 index 0000000000..a8abd81694 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/standalone_gui.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/system-info_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/system-info_panel.png new file mode 100644 index 0000000000..5a5fa01187 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/system-info_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ta_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ta_panel.png new file mode 100644 index 0000000000..2f08f9a6b1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ta_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/td_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/td_panel.png new file mode 100644 index 0000000000..819407515b Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/td_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/top-stat_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/top-stat_panel.png new file mode 100644 index 0000000000..5e3dddca2f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/top-stat_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo1.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo1.png new file mode 100644 index 0000000000..bda64883c4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo1.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo2.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo2.png new file mode 100644 index 0000000000..8b2d258521 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo2.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo3.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo3.png new file mode 100644 index 0000000000..76cd7ed9a9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo3.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.png new file mode 100644 index 0000000000..f770a1b291 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.svg new file mode 100644 index 0000000000..53affd4fc6 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + x2 + + diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-addr-translation_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-addr-translation_panel.png new file mode 100644 index 0000000000..0fb4aaf076 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-addr-translation_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-accesses_panel.png new file mode 100644 index 0000000000..5259b2214f Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-stalls_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-stalls_panel.png new file mode 100644 index 0000000000..61e09c915c Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-stalls_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-l2-transactions_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-l2-transactions_panel.png new file mode 100644 index 0000000000..51875e516c Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-l2-transactions_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-sol_panel.png new file mode 100644 index 0000000000..5c2485d0d7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-launch-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-launch-stats_panel.png new file mode 100644 index 0000000000..38e4517f33 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-launch-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-runtime-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-runtime-stats_panel.png new file mode 100644 index 0000000000..517d461d31 Binary files /dev/null and b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-runtime-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/index.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/index.md new file mode 100644 index 0000000000..bd01b87354 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/index.md @@ -0,0 +1,22 @@ +# Welcome to the [Omniperf](https://github.com/ROCm/omniperf) Documentation! + +```{warning} +This version of the documentation is archived and contains out-of-date information. +See [Omniperf documentation](https://rocm.docs.amd.com/projects/omniperf/en/latest/index.html) for the latest version. +``` + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 + :caption: Table of Contents + + introduction + high_level_design + installation + getting_started + profiling + analysis + performance_model + faq +``` diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/installation.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/installation.md new file mode 100644 index 0000000000..c28654de30 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/installation.md @@ -0,0 +1,267 @@ +# Deployment + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +## Install Omniperf from source + +Omniperf is broken into two installation components: + +1. **Omniperf Client-side (_Required_)** + - Provides core application profiling capability + - Allows collection of performance counters, filtering by hardware block, dispatch, kernel, etc + - CLI based analysis mode + - Stand alone web interface for importing analysis metrics +2. **Omniperf Server-side (_Optional_)** + - Mongo DB backend + Grafana instance + - Packaged in a Docker container for easy setup + +Determine what you need to install based on how you would like to interact with Omniperf. See the decision tree below to help determine what installation is right for you. + +![Omniperf Installation Decision Tree](images/install_decision_tree.png) + +--- + +### Client-side Installation + +Omniperf client-side requires the following basic software dependencies prior to usage: + +* Python (>=3.8) +* CMake (>= 3.19) +* ROCm (>= 5.7.1) + +In addition, Omniperf leverages a number of Python packages that are +documented in the top-level `requirements.txt` file. These must be +installed prior to Omniperf configuration. + +```{admonition} Optional packages +If you would like to build Omniperf as a developer, consider these additional requirements: + +| Requirement file | Description | +| --------------------- | -------------------------------------------------------------- | +| requirements-doc.txt | Python packages required to build docs from source | +| requirements-test.txt | Python packages required to run Omniperf's CI suite via PyTest | + +``` + +The recommended procedure for Omniperf usage is to install into a shared file system so that multiple users can access the final installation. The following steps illustrate how to install the necessary python dependencies using [pip](https://packaging.python.org/en/latest/) and Omniperf into a shared location controlled by the `INSTALL_DIR` environment variable. + +```{admonition} Configuration variables +The following installation example leverages several +[CMake](https://cmake.org/cmake/help/latest/) project variables +defined as follows: +| Variable | Description | +| -------------------- | -------------------------------------------------------------------- | +| CMAKE_INSTALL_PREFIX | controls install path for Omniperf files | +| PYTHON_DEPS | provides optional path to resolve Python package dependencies | +| MOD_INSTALL_PATH | provides optional path for separate Omniperf modulefile installation | + +``` + +A typical install will begin by downloading the latest release tarball +available from the +[Releases](https://github.com/ROCm/omniperf/releases) section +of the Omniperf development site. From there, untar and descend into +the top-level directory as follows: + +```shell-session +$ tar xfz omniperf-v{__VERSION__}.tar.gz +$ cd omniperf-v{__VERSION__} +``` + +Next, install Python dependencies and complete the Omniperf configuration/install process as follows: + +```shell-session +# define top-level install path +$ export INSTALL_DIR= + +# install python deps +$ python3 -m pip install -t ${INSTALL_DIR}/python-libs -r requirements.txt + +# configure Omniperf for shared install +$ mkdir build +$ cd build +$ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/{__VERSION__} \ + -DPYTHON_DEPS=${INSTALL_DIR}/python-libs \ + -DMOD_INSTALL_PATH=${INSTALL_DIR}/modulefiles .. + +# install +$ make install +``` + +```{tip} +You may require `sudo` during the final install step if you +do not have write access to the chosen install path. +``` + + +After completing these steps, a successful top-level installation directory looks as follows: +```shell-session +$ ls $INSTALL_DIR +modulefiles {__VERSION__} python-libs +``` + +#### Execution using modulefiles + +The installation process includes creation of an environment +modulefile for use with [Lmod](https://lmod.readthedocs.io). On +systems that support Lmod, a user can register the Omniperf modulefile +directory and setup their environment for execution of Omniperf as +follows: + + + +```shell-session +$ module use $INSTALL_DIR/modulefiles +$ module load omniperf +$ which omniperf +/opt/apps/omniperf/{__VERSION__}/bin/omniperf + +$ omniperf --version +ROC Profiler: /opt/rocm-5.1.0/bin/rocprof + +omniperf (v{__VERSION__}) +``` + +```{tip} Users relying on an Lmod Python module locally may wish to +customize the resulting Omniperf modulefile post-installation to +include additional module dependencies. +``` + +#### Execution without modulefiles + +To use Omniperf without the companion modulefile, update your `PATH` +settings to enable access to the command-line binary. If you installed Python +dependencies in a shared location, update your `PYTHONPATH` config as well: + +```shell-session +export PATH=$INSTALL_DIR/{__VERSION__}/bin:$PATH +export PYTHONPATH=$INSTALL_DIR/python-libs +``` + +#### rocProf + +Omniperf relies on a rocProf binary during the profiling +process. Normally the path to this binary will be detected +automatically, but it can also be overridden via the setting the +optional `ROCPROF` environment variable to the path of the binary the user +wishes to use instead. + + + + + +%%% ### Generate Packaging +%%% ```console +%%% cd build +%%% cpack -G STGZ +%%% cpack -G DEB -D CPACK_PACKAGING_INSTALL_PREFIX=/opt/omniperf +%%% cpack -G RPM -D CPACK_PACKAGING_INSTALL_PREFIX=/opt/omniperf +%%% ``` + +--- + +### Server-side Setup + +```{note} +Server-side setup is **not required** to profile or analyze performance data from the CLI. It is provided as an additional mechanism to import performance data for examination within a detailed [Grafana](https://github.com/grafana/grafana) GUI. +``` + +Omniperf server-side requires the following basic software dependencies prior to usage: + +* [Docker Engine](https://docs.docker.com/engine/install/) + +The recommended process for enabling the server-side of Omniperf is to use the provided Docker file to build the Grafana and MongoDB instance. + +Once you have decided which machine you would like to use to host the Grafana and MongoDB instance, please follow the set-up instructions below. + +#### Install MongoDB Utils +Omniperf uses [mongoimport](https://www.mongodb.com/docs/database-tools/mongoimport/) to upload data to Grafana's backend database. Install for Ubuntu 20.04 is as follows: + +```bash +$ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb +$ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb +``` +> Installation instructions for alternative distributions can be found [here](https://www.mongodb.com/download-center/database-tools/releases/archive) + +#### Persistent Storage + +The user will also bind MongoDB to a directory on the host OS to create a local backup in case of a crash or reset. In the Docker world, this is known as "creating a persistent volume": + +```bash +$ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/ +$ sudo mkdir -p grafana-storage mongodb +$ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/grafana-storage --opt o=bind grafana-storage +$ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/mongodb --opt o=bind grafana-mongo-db +``` + +#### Build and Launch + +We are now ready to build our Docker file. Navigate to your Omniperf install directory to begin. +```bash +$ cd grafana +$ sudo docker-compose build +$ sudo docker-compose up -d +``` +> TCP ports for Grafana (4000) and MongoDB (27017) in the docker container are mapped to 14000 and 27018, respectively, on the host side. + +```{tip} +In the event that your Grafana or MongoDB instance crash fatally, you can always restart the server. Just navigate to your install directory and run: +``` + +```bash +$ cd grafana +$ sudo docker-compose down +$ sudo docker-compose up -d +``` + +#### Setup Grafana Instance +Once you have launched your docker container you should be able to reach Grafana at **http://\:14000**. The default login credentials for the first-time Grafana setup are: + +- Username: **admin** +- Password: **admin** + +![Grafana Welcome Page](images/grafana_welcome.png) + +#### MongoDB Datasource Configuration + +The MongoDB Datasource must be configured prior to the first-time use. Navigate to Grafana's Configuration page (shown below) to add the **Omniperf Data** connection. + +![Omniperf Datasource Config](images/datasource_config.jpg) + +Configure the following fields in the datasource settings: + +- __HTTP URL__: set to `http://localhost:3333` +- __MongoDB URL__: set to `mongodb://temp:temp123@\:27018/admin?authSource=admin` +- __Database Name__: set to `admin` + +After properly configuring these fields click **Save & Test** (as shown below) to make sure your connection is successful. + +> Note to avoid potential DNS issue, one may need to use the actual IP address for the host node in the MongoDB URL. + +![Datasource Settings](images/datasource_settings.jpg) + +#### Omniperf Dashboard Import + +From *Create* → *Import*, (as shown below) upload the dashboard file, `/dashboards/Omniperf_v{__VERSION__}_pub.json`, from the Omniperf tarball. + +Edit both the Dashboard Name and the Unique Identifier (UID) to uniquely identify the dashboard he/she will use. Click Import to finish the process. + +![Import Dashboard](images/import_dashboard.png) + +#### Using your dashboard + +Once you have imported a dashboard you are ready to begin! Start by browsing available dashboards and selecting the dashboard you have just imported. + +![Opening your dashboard](images/opening_dashboard.png) + +Remember, you will need to upload workload data to the DB backend before analyzing in your Grafana interface. We provide a detailed example of this in our [Analysis section](./analysis.md#grafana-gui-import). + +After a workload has been successfully uploaded, you should be able to select it from the workload dropdown located at the top of your Grafana dashboard. + +![Selecting Grafana workload](images/grafana_workload_selection.png) + +For more information on how to use the Grafana interface for analysis please see the [Grafana section](./analysis.md#grafana-based-gui) in the Analyze Mode tab. diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/introduction.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/introduction.md new file mode 100644 index 0000000000..14575b33af --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/introduction.md @@ -0,0 +1,61 @@ +# Introduction + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +This documentation was created to provide a detailed breakdown of all facets of Omniperf. In addition to a full deployment guide with installation instructions, we also explain the design of the tool and each of its components. If you are new to Omniperf, these chapters can be followed in order to gradually acquaint you with the tool and progressively introduce its more advanced features. + +This project is proudly open source, and we welcome all feedback! For more details on how to contribute, please see our Contribution Guide. + +[Browse Omniperf source code on Github](https://github.com/ROCm/omniperf) + +## What is Omniperf + +Omniperf is a kernel level profiling tool for Machine Learning/HPC workloads running on AMD Instinct (tm) MI accelerators. AMD's Instinct (tm) MI accelerators are Data Center GPUs designed for compute and with some graphics functions disabled or removed. Omniperf is currently built on top of [rocProf](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html) to monitor hardware performance counters. The Omniperf tool primarily targets accelerators in the MI100, MI200, and MI300 families. Development is in progress to support Radeon (tm) RDNA (tm) GPUs. + +## Features + +The Omniperf tool performs profiling based on all available hardware counters for the target accelerator. It provides high level performance analysis features including System Speed-of-Light, Hardware block level Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... + +Both command line analysis and GUI analysis are supported. + +Detailed Feature List: + +- MI100 support +- MI200 support +- Standalone GUI Analyzer +- Grafana/MongoDB GUI Analyzer +- Dispatch Filtering +- Kernel Filtering +- GPU ID Filtering +- Baseline Comparison +- Multi-Normalizations +- System Info Panel +- System Speed-of-Light Panel +- Kernel Statistic Panel +- Memory Chart Analysis Panel +- Roofline Analysis Panel (_Supported on MI200 only, Ubuntu 20.04, SLES 15 SP3 or RHEL8_) +- Command Processor (CP) Panel +- Workgroup Manager (SPI) Panel +- Wavefront Launch Panel +- Compute Unit - Instruction Mix Panel +- Compute Unit - Pipeline Panel +- Local Data Share (LDS) Panel +- Instruction Cache Panel +- Scalar L1D Cache Panel +- L1 Address Processing Unit, a.k.a. Texture Addresser (TA) / L1 Backend Data Processing Unit, a.k.a. Texture Data (TD) panel(s) +- Vector L1D Cache Panel +- L2 Cache Panel +- L2 Cache (per-Channel) Panel + +## Compatible SoCs + +| Platform | Status | +| :---------------- | :--------- | +| Vega 20 (MI50/60) | No support | +| MI100 | Supported | +| MI200 | Supported | +| MI300 | Supported | diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/make.bat b/projects/rocprofiler-compute/docs/archive/docs-2.x/make.bat new file mode 100644 index 0000000000..27f573b87a --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/performance_model.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/performance_model.md new file mode 100644 index 0000000000..822ba1a659 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/performance_model.md @@ -0,0 +1,4409 @@ +# AMD Instinct(tm) MI Series Accelerator Performance Model + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 5 +``` + +Omniperf makes available an extensive list of metrics to better understand achieved application performance on AMD Instinct(tm) MI accelerators including Graphics Core Next (GCN) GPUs such as the AMD Instinct MI50, CDNA(tm) accelerators such as the MI100, and CDNA(tm) 2 accelerators such as MI250X/250/210. + +To best utilize this profiling data, it is vital to understand the role of various hardware blocks of AMD Instinct accelerators. This section aims to describe each hardware block on the accelerator as interacted with by a software developer, and give a deeper understanding of the metrics reported therein. Refer to [Profiling with Omniperf by Example](profiling-with-omniperf) for more practical examples and detail on how to use Omniperf to optimize your code. + +(2xxnote)= +```{note} +In this document, we use `MI2XX` to refer to any of the AMD Instinct(tm) MI250X, MI250, and MI210 CDNA2 accelerators interchangeably for situations where the exact product in question is not relevant. +For more details on the differences between these accelerators, we refer the reader to the [MI250X](https://www.amd.com/en/products/server-accelerators/instinct-mi250x), [MI250](https://www.amd.com/en/products/server-accelerators/instinct-mi250) and [MI210](https://www.amd.com/en/products/server-accelerators/amd-instinct-mi210) product pages. +``` + + +(CU)= +## Compute Unit (CU) + +The Compute Unit (CU) is responsible for executing a user's kernels on AMD's CDNA(tm) accelerators. All [wavefronts](wavefront) of a [workgroup](workgroup) are scheduled on the same CU. + +![GCN Compute Unit](images/gcn_compute_unit.png) + +The CU consists of several independent pipelines / functional units: + +- The vector arithmetic logic unit (VALU) is composed of multiple Single Instruction Multiple Data (SIMD) vector processors, Vector General Purpose Registers (VGPRs) and instruction buffers. The VALU is responsible for executing much of the computational work on CDNA accelerators, including (but not limited to) floating-point operations (FLOPs), integer operations (IOPs), etc. +- The vector memory (VMEM) unit is responsible for issuing loads, stores and atomic operations that interact with the memory system. +- The Scalar Arithmetic Logic Unit (SALU) is shared by all threads in a [wavefront](wavefront), and is responsible for executing instructions that are known to be uniform across the wavefront at compile-time. The SALU has a memory unit (SMEM) for interacting with memory, but it cannot issue separately from the SALU. +- The Local Data Share (LDS) is an on-CU software-managed scratchpad memory that can be used to efficiently share data between all threads in a [workgroup](workgroup). +- The scheduler is responsible for issuing and decoding instructions for all the [wavefronts](wavefront) on the compute unit. +- The vector L1 data cache (vL1D) is the first level cache local to the compute unit. On current CDNA accelerators, the vL1D is write-through. The vL1D caches from multiple compute units are kept coherent with one another through software instructions. +- CDNA accelerators --- i.e., the MI100 and newer --- contain specialized matrix-multiplication accelerator pipelines known as the [Matrix Fused Multiply-Add (MFMA)](mfma). + +For a more thorough description of a compute unit on a CDNA accelerator, see [An introduction to AMD GPU +Programming with HIP](https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf), specifically slides 22-28, and [Layla Mah's: The AMD GCN Architecture - A Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 27. + +The [Pipeline Descriptions section](ERD) details the various execution pipelines (VALU, SALU, LDS, Scheduler, etc.). +The metrics presented by Omniperf for these pipelines are described in [Pipeline Metrics section](ERM). +Finally, the [vL1D](vL1D) cache and [LDS](LDS) will be described their own sections. + + +(ERD)= +### Pipeline Descriptions + +(valu)= +#### Vector Arithmetic Logic Unit (VALU) + +The vector arithmetic logic unit (VALU) executes vector instructions over an entire wavefront, each [work-item](Workitem) (or, vector-lane) potentially operating on distinct data. +The VALU of a CDNA accelerator or GCN GPU typically consists of: + +- four 16-wide SIMD processors (see [An introduction to AMD GPU +Programming with HIP](https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf) for more details) +- four 64 or 128 KiB VGPR files (yielding a total of 256-512 KiB total per CU), see [AGPRs](agprs) for more detail. +- An instruction buffer (per-SIMD) that contains execution slots for up to 8 wavefronts (for 32 total wavefront slots on each CU). +- A vector memory (VMEM) unit which transfers data between VGPRs and memory; each work-item supplies its own memory address and supplies or receives unique data. +- CDNA accelerators, such as the MI100 and [MI2XX](2xxnote), contain additional [Matrix Fused Multiply-Add (MFMA) units](https://gpuopen.com/learn/amd-lab-notes/amd-lab-notes-matrix-cores-readme/). + +In order to support branching / conditionals, each wavefront in the VALU has a distinct execution mask which determines which work-items in the wavefront are active for the currently executing instruction. +When executing a VALU instruction, inactive work-items (according to the current execution mask of the wavefront) do not execute the instruction and are treated as no-ops. + +```{note} +On GCN GPUs and the CDNA MI100 accelerator, there are slots for up to 10 wavefronts in the instruction buffer, but generally occupancy is limited by other factors to 32 waves per [Compute Unit](CU). +On the CDNA2 [MI2XX](2xxnote) series accelerators, there are only 8 waveslots per-SIMD. +``` + +(salu)= +#### Scalar Arithmetic Logic Unit (SALU) + +The scalar arithmetic logic unit (SALU) executes instructions that are shared between all work-items in a wavefront. This includes control-flow -- such as if/else conditionals, branches and looping -- pointer arithmetic, loading common values, etc. +The SALU consists of: + +- a scalar processor capable of various arithmetic, conditional, and comparison (etc.) operations. See, e.g., [Chapter 5. Scalar ALU Operations](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf) of the CDNA2 Instruction Set Architecture (ISA) Guide for more detail. +- a 12.5 KiB Scalar General Purpose Register (SGPR) file +- a scalar memory (SMEM) unit which transfers data between SGPRs and memory + +Data loaded by the SMEM can be cached in the [scalar L1 data cache](sL1D), and is typically only used for read-only, uniform accesses such as kernel arguments, or HIP's `__constant__` memory. + +(lds)= +#### Local Data Share (LDS) + +The local data share (LDS, a.k.a., "shared memory") is fast on-CU scratchpad that can be explicitly managed by software to effectively share data and to coordinate between wavefronts in a workgroup. + +```{figure} images/lds.* +:scale: 150 % +:alt: Performance model of the Local Data Share (LDS) on AMD Instinct(tm) MI accelerators. +:align: center + +Performance model of the Local Data Share (LDS) on AMD Instinct(tm) MI accelerators. +``` + +Above is Omniperf's performance model of the LDS on CDNA accelerators (adapted from [GCN Architecture, by Mike Mantor](https://old.hotchips.org/wp-content/uploads/hc_archives/hc24/HC24-3-ManyCore/HC24.28.315-AMD.GCN.mantor_v1.pdf), slide 20). +The SIMDs in the [VALU](valu) are connected to the LDS in pairs (see above). +Only one SIMD per pair may issue an LDS instruction at a time, but both pairs may issue concurrently. + +On CDNA accelerators, the LDS contains 32 banks and each bank is 4B wide. +The LDS is designed such that each bank can be read from/written to/atomically updated every cycle, for a total throughput of 128B/clock ([GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 40). + +On each of the two ports to the SIMDs, 64B can be sent in each direction per cycle. So, a single wavefront, coming from one of the 2 SIMDs in a pair, can only get back 64B/cycle (16 lanes per cycle). The input port is shared between data and address and this can affect achieved bandwidth for different data sizes. For example, a 64-wide store where each lane is sending a 4B value takes 8 cycles (50% peak bandwidth) while a 64-wide store where each lane is sending a 16B value takes 20 cycles (80% peak bandwidth). + +In addition, the LDS contains conflict-resolution hardware to detect and handle bank conflicts. +A bank conflict occurs when two (or more) work-items in a wavefront want to read, write, or atomically update different addresses that map to the same bank in the same cycle. +In this case, the conflict detection hardware will determine a new schedule such that the access is split into multiple cycles with no conflicts in any single cycle. + +When multiple work-items want to read from the same address within a bank, the result can be efficiently broadcasted ([GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 41). +Multiple work-items writing to the same address within a bank typically results undefined behavior in HIP and other languages, as the LDS will write the value from the last work-item as determined by the hardware scheduler ([GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 41). This behavior may be useful in the very specific case of storing a uniform value. + +Relatedly, an address conflict is defined as occurring when two (or more) work-items in a wavefront want to atomically update the same address on the same cycle. +As in a bank-conflict, this may cause additional cycles of work for the LDS operation to complete. + +(branch)= +#### Branch + +The branch unit is responsible for executing jumps and branches to execute control-flow operations. +Note that Branch operations are not used for execution mask updates, but only for “whole wavefront” control-flow changes. + +(scheduler)= +#### Scheduler + +The scheduler is responsible for arbitration and issue of instructions for all the wavefronts currently executing on the CU. On every clock cycle, the scheduler: + +- considers waves from one of the SIMD units for execution, selected in a round-robin fashion between the SIMDs in the [compute unit](CU) +- issues up to one instruction per wavefront on the selected SIMD +- issues up to one instruction per each of the instruction categories among the waves on the selected SIMD: + - [VALU](valu) + - [VMEM](valu) operations + - [SALU](salu) / SMEM operations + - [LDS](lds) + - [Branch](branch) operations + +This gives a maximum of five issued Instructions Per Cycle (IPC), per-SIMD, per-CU ([AMD GPU HIP Training](https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf), [GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah)). + +On CDNA accelerators with [MFMA](mfma) instructions, these are issued via the [VALU](valu). Some of them will execute on a separate functional unit and typically allow other [VALU](valu) operations to execute in their shadow (see the [MFMA](mfma) section for more detail). + +```{note} +The IPC model used by Omniperf omits the following two complications for clarity. +First, CDNA accelerators contain other execution units on the CU that are unused for compute applications. +Second, so-called "internal" instructions (see [Layla Mah's GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 29) are not issued to a functional unit, and can technically cause the maximum IPC to _exceed_ 5 instructions per-cycle in special (largely unrealistic) cases. +The latter issue is discussed in more detail in our ['internal' IPC](Internal_ipc) example. +``` + +(mfma)= +#### Matrix Fused Multiply-Add (MFMA) + +CDNA accelerators, such as the MI100 and [MI2XX](2xxnote), contain specialized hardware to accelerate matrix-matrix multiplications, also known as Matrix Fused Multiply-Add (MFMA) operations. +The exact operation types and supported formats may vary by accelerator. +The reader is referred to the [AMD matrix cores](https://gpuopen.com/learn/amd-lab-notes/amd-lab-notes-matrix-cores-readme/) blog post on GPUOpen for a general discussion of these hardware units. +In addition, to explore the available MFMA instructions in-depth on various AMD accelerators (including the CDNA line), we recommend the [AMD Matrix Instruction Calculator](https://github.com/RadeonOpenCompute/amd_matrix_instruction_calculator). + +```{code-block} shell-session +:name: matrix_calc_ex +:caption: Partial snapshot of the AMD Matrix Instruction Calculator Tool + +$ ./matrix_calculator.py --architecture cdna2 --instruction v_mfma_f32_4x4x1f32 --detail-instruction +Architecture: CDNA2 +Instruction: V_MFMA_F32_4X4X1F32 + Encoding: VOP3P-MAI + VOP3P Opcode: 0x42 + VOP3P-MAI Opcode: 0x2 + Matrix Dimensions: + M: 4 + N: 4 + K: 1 + blocks: 16 + Execution statistics: + FLOPs: 512 + Execution cycles: 8 + FLOPs/CU/cycle: 256 + Can co-execute with VALU: True + VALU co-execution cycles possible: 4 + Register usage: + GPRs required for A: 1 + GPRs required for B: 1 + GPRs required for C: 4 + GPRs required for D: 4 + GPR alignment requirement: 8 bytes +``` + +For the purposes of Omniperf, the MFMA unit is typically treated as a separate pipeline from the [VALU](valu), as other VALU instructions (along with other execution pipelines such as the SALU) can be issued during a portion of the total duration of an MFMA operation. + +```{note} +The exact details of VALU and MFMA operation co-execution vary by instruction, and can be explored in more detail via the: + - 'Can co-execute with VALU' + - 'VALU co-execution cycles possible' + +fields in the [AMD Matrix Instruction Calculator](https://github.com/RadeonOpenCompute/amd_matrix_instruction_calculator#example-of-querying-instruction-information)'s detailed instruction information. +``` + +#### Non-pipeline resources + +In this section, we describe a few resources that are not standalone pipelines but are important for understanding performance optimization on CDNA accelerators. + +(barrier)= +##### Barrier + +Barriers are resources on the compute-unit of a CDNA accelerator that are used to implement synchronization primitives (e.g., HIP's `__syncthreads`). +Barriers are allocated to any workgroup that consists of more than a single wavefront. + +(agprs)= +##### Accumulation vector General-Purpose Registers (AGPRs) + +Accumulation vector General-Purpose Registers, or AGPRs, are special resources that are accessible to a subset of instructions focused on [MFMA](mfma) operations. +These registers allow the [MFMA](mfma) unit to access more than the normal maximum of 256 [architected Vector General-Purpose Registers (i.e., VGPRs)](valu) by having up to 256 in the architected space and up to 256 in the accumulation space. +Traditional VALU instructions can only use VGPRs in the architected space, and data can be moved to/from VGPRs↔AGPRs using specialized instructions (`v_accvgpr_*`). +These data movement instructions may be used by the compiler to implement lower-cost register-spill/fills on architectures with AGPRs. + +AGPRs are not available on all AMD Instinct(tm) accelerators. +GCN GPUs, such as the AMD Instinct(tm) MI50 had a 256 KiB VGPR file. +The AMD Instinct(tm) MI100 (CDNA) has a 2x256 KiB register file, where one half is available as general-purpose VGPRs, and the other half is for matrix math accumulation VGPRs (AGPRs). +The AMD Instinct(tm) [MI2XX](2xxnote) (CDNA2) has a 512 KiB VGPR file per CU, where each wave can dynamically request up to 256 KiB of VGPRs and an additional 256 KiB of AGPRs. +For more detail, the reader is referred to the [following comment](https://github.com/RadeonOpenCompute/ROCm/issues/1689#issuecomment-1553751913). + +(ERM)= +### Pipeline Metrics + +In this section, we describe the metrics available in Omniperf to analyze the pipelines discussed in the [previous section](ERD). + +#### Wavefront + +(Wavefront_launch_stats)= +##### Wavefront Launch Stats + +The wavefront launch stats panel gives general information about the kernel launch: + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Grid Size + - The total number of work-items (a.k.a "threads") launched as a part of the kernel dispatch. In HIP, this is equivalent to the total grid size multiplied by the total workgroup (a.k.a "block") size. + - [Work-items](Workitem) +* - Workgroup Size + - The total number of work-items (a.k.a "threads") in each workgroup (a.k.a "block") launched as part of the kernel dispatch. In HIP, this is equivalent to the total block size. + - [Work-items](Workitem) +* - Total Wavefronts + - The total number of wavefronts launched as part of the kernel dispatch. On AMD Instinct(tm) CDNA accelerators and GCN GPUs, the wavefront size is always 64 work-items. Thus, the total number of wavefronts should be equivalent to the ceiling of Grid Size divided by 64. + - [Wavefronts](Wavefront) +* - Saved Wavefronts + - The total number of wavefronts saved at a context-save, see [cwsr_enable](https://docs.kernel.org/gpu/amdgpu/module-parameters.html?highlight=cwsr). + - [Wavefronts](Wavefront) +* - Restored Wavefronts + - The total number of wavefronts restored from a context-save, see [cwsr_enable](https://docs.kernel.org/gpu/amdgpu/module-parameters.html?highlight=cwsr). + - [Wavefronts](Wavefront) +* - VGPRs + - The number of architected vector general-purpose registers allocated for the kernel, see [VALU](valu). Note: this may not exactly match the number of VGPRs requested by the compiler due to allocation granularity. + - [VGPRs](valu) +* - AGPRs + - The number of accumulation vector general-purpose registers allocated for the kernel, see [AGPRs](agprs). Note: this may not exactly match the number of AGPRs requested by the compiler due to allocation granularity. + - [AGPRs](agprs) +* - SGPRs + - The number of scalar general-purpose registers allocated for the kernel, see [SALU](salu). Note: this may not exactly match the number of SGPRs requested by the compiler due to allocation granularity. + - [SGPRs](salu) +* - LDS Allocation + - The number of bytes of [LDS](lds) memory (a.k.a., "Shared" memory) allocated for this kernel. Note: This may also be larger than what was requested at compile-time due to both allocation granularity and dynamic per-dispatch LDS allocations. + - Bytes per [workgroup](workgroup) +* - Scratch Allocation + - The number of bytes of [scratch-memory](Mspace) requested _per_ work-item for this kernel. Scratch memory is used for stack memory on the accelerator, as well as for register spills/restores. + - Bytes per [work-item](workitem) +``` + +(Wavefront_runtime_stats)= +##### Wavefront Runtime Stats + +The wavefront runtime statistics gives a high-level overview of the execution of wavefronts in a kernel: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - [Kernel Time](KernelTime) + - The total duration of the executed kernel. Note: this should not be directly compared to the wavefront cycles / timings below. + - Nanoseconds +* - [Kernel Cycles](KernelCycles) + - The total duration of the executed kernel in cycles. Note: this should not be directly compared to the wavefront cycles / timings below. + - Cycles +* - Instructions per wavefront + - The average number of instructions (of all types) executed per wavefront. This is averaged over all wavefronts in a kernel dispatch. + - Instructions / wavefront +* - Wave Cycles + - The number of cycles a wavefront in the kernel dispatch spent resident on a compute unit per [normalization-unit](normunit). This is averaged over all wavefronts in a kernel dispatch. Note: this should not be directly compared to the kernel cycles above. + - Cycles per [normalization-unit](normunit) +* - Dependency Wait Cycles + - The number of cycles a wavefront in the kernel dispatch stalled waiting on memory of any kind (e.g., instruction fetch, vector or scalar memory, etc.) per [normalization-unit](normunit). This counter is incremented at every cycle by _all_ wavefronts on a CU stalled at a memory operation. As such, it is most useful to get a sense of how waves were spending their time, rather than identification of a precise limiter because another wave could be actively executing while a wave is stalled. The sum of this metric, Issue Wait Cycles and Active Cycles should be equal to the total Wave Cycles metric. + - Cycles per [normalization-unit](normunit) +* - Issue Wait Cycles + - The number of cycles a wavefront in the kernel dispatch was unable to issue an instruction for any reason (e.g., execution pipe back-pressure, arbitration loss, etc.) per [normalization-unit](normunit). This counter is incremented at every cycle by _all_ wavefronts on a CU unable to issue an instruction. As such, it is most useful to get a sense of how waves were spending their time, rather than identification of a precise limiter because another wave could be actively executing while a wave is issue stalled. The sum of this metric, Dependency Wait Cycles and Active Cycles should be equal to the total Wave Cycles metric. + - Cycles per [normalization-unit](normunit) +* - Active Cycles + - The average number of cycles a wavefront in the kernel dispatch was actively executing instructions per [normalization-unit](normunit). This measurement is made on a per-wavefront basis, and may include (e.g.,) cycles that another wavefront spent actively executing (e.g., on another execution unit) or was stalled. As such, it is most useful to get a sense of how waves were spending their time, rather than identification of a precise limiter. The sum of this metric, Issue Wait Cycles and Active Wait Cycles should be equal to the total Wave Cycles metric. + - Cycles per [normalization-unit](normunit) +* - Wavefront Occupancy + - The time-averaged number of wavefronts resident on the accelerator over the lifetime of the kernel. Note: this metric may be inaccurate for short-running kernels (<< 1ms). + - Wavefronts +``` + +```{seealso} +As mentioned above, the measurement of kernel cycles and time typically cannot directly be compared to e.g., Wave Cycles. +This is due to two factors: first, the kernel cycles/timings are measured using a counter that is impacted by scheduling overhead, this is particularly noticeable for "short-running" kernels (typically << 1ms) where scheduling overhead forms a significant portion of the overall kernel runtime. +Secondly, the Wave Cycles metric is incremented per-wavefront scheduled to a SIMD every cycle whereas the kernel cycles counter is incremented only once per-cycle when _any_ wavefront is scheduled. +``` + +(Inst_mix)= +#### Instruction Mix + +The instruction mix panel shows a breakdown of the various types of instructions executed by the user's kernel, and which pipelines on the [CU](CU) they were executed on. +In addition, Omniperf reports further information about the breakdown of operation types for the [VALU](valu), vector-memory, and [MFMA](mfma) instructions. + +```{note} +All metrics in this section count _instructions issued_, and _not_ the total number of operations executed. +The values reported by these metrics will not change regardless of the execution mask of the wavefront. +We note that even if the execution mask is identically zero (i.e., _no lanes are active_) the instruction will still be counted, as CDNA accelerators still consider these instructions 'issued' see, e.g., [EXECute Mask, Section 3.3 of the CDNA2 ISA Guide](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf) for more details. +``` + +##### Overall Instruction Mix + +This panel shows the total number of each type of instruction issued to the [various compute pipelines](ERD) on the [CU](CU). +These are: + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - [VALU](valu) Instructions + - The total number of vector arithmetic logic unit (VALU) operations issued. These are the workhorses of the compute-unit, and are used to execute wide range of instruction types including floating point operations, non-uniform address calculations, transcendental operations, integer operations, shifts, conditional evaluation, etc. + - Instructions +* - VMEM Instructions + - The total number of vector memory operations issued. These include most loads, stores and atomic operations and all accesses to [generic, global, private and texture](Mspace) memory. + - Instructions +* - [LDS](lds) Instructions + - The total number of LDS (a.k.a., "shared memory") operations issued. These include (e.g.,) loads, stores, atomics, and HIP's `__shfl` operations. + - Instructions +* - [MFMA](mfma) Instructions + - The total number of matrix fused multiply-add instructions issued. + - Instructions +* - [SALU](salu) Instructions + - The total number of scalar arithmetic logic unit (SALU) operations issued. Typically these are used for (e.g.,) address calculations, literal constants, and other operations that are _provably_ uniform across a wavefront. Although scalar memory (SMEM) operations are issued by the SALU, they are counted separately in this section. + - Instructions +* - SMEM Instructions + - The total number of scalar memory (SMEM) operations issued. These are typically used for loading kernel arguments, base-pointers and loads from HIP's `__constant__` memory. + - Instructions +* - [Branch](branch) Instructions + - The total number of branch operations issued. These typically consist of jump / branch operations and are used to implement control flow. + - Instructions +``` + +```{note} +Note, as mentioned in the [Branch](branch) section: branch operations are not used for execution mask updates, but only for "whole wavefront" control-flow changes. +``` + +(VALU_Inst_Mix)= +##### VALU Arithmetic Instruction Mix +```{warning} +Not all metrics in this section (e.g., the floating-point instruction breakdowns) are available on CDNA accelerators older than the [MI2XX](2xxnote) series. +``` + +This panel details the various types of vector instructions that were issued to the [VALU](valu). +The metrics in this section do _not_ include [MFMA](mfma) instructions using the same precision, e.g. the "F16-ADD" metric does not include any 16-bit floating point additions executed as part of an MFMA instruction using the same precision. + +```{list-table} +:header-rows: 1 +:widths: 15 65 20 +:class: noscroll-table +* - Metric + - Description + - Unit +* - INT32 + - The total number of instructions operating on 32-bit integer operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - INT64 + - The total number of instructions operating on 64-bit integer operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F16-ADD + - The total number of addition instructions operating on 16-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F16-MUL + - The total number of multiplication instructions operating on 16-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F16-FMA + - The total number of fused multiply-add instructions operating on 16-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F16-TRANS + - The total number of transcendental instructions (e.g., `sqrt`) operating on 16-bit floating-point operands issued to the VALU per [normalization-unit](normunit) + - Instructions per [normalization-unit](normunit) +* - F32-ADD + - The total number of addition instructions operating on 32-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F32-MUL + - The total number of multiplication instructions operating on 32-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F32-FMA + - The total number of fused multiply-add instructions operating on 32-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F32-TRANS + - The total number of transcendental instructions (e.g., `sqrt`) operating on 32-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F64-ADD + - The total number of addition instructions operating on 64-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F64-MUL + - The total number of multiplication instructions operating on 64-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F64-FMA + - The total number of fused multiply-add instructions operating on 64-bit floating-point operands issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - F64-TRANS + - The total number of transcendental instructions (e.g., `sqrt`) operating on 64-bit floating-point operands issued to the VALUper [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Conversion + - The total number of type conversion instructions (e.g., converting data to/from F32↔F64) issued to the VALU per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +``` + +For an example of these counters in action, the reader is referred to the [VALU Arithmetic Instruction Mix example](VALU_inst_mix_example). + +##### VMEM Instruction Mix + +This section breaks down the types of vector memory (VMEM) instructions that were issued. +Refer to the [Instruction Counts metrics section](TA_inst) of address-processor frontend of the vL1D cache for a description of these VMEM instructions. + +(MFMA_Inst_mix)= +##### MFMA Instruction Mix + +```{warning} +The metrics in this section are only available on CDNA2 ([MI2XX](2xxnote)) accelerators and newer. +``` + +This section details the types of Matrix Fused Multiply-Add ([MFMA](mfma)) instructions that were issued. +Note that [MFMA](mfma) instructions are classified by the type of input data they operate on, and _not_ the data-type the result is accumulated to. + +```{list-table} +:header-rows: 1 +:widths: 25 60 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - MFMA-I8 Instructions + - The total number of 8-bit integer [MFMA](mfma) instructions issued per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - MFMA-F16 Instructions + - The total number of 16-bit floating point [MFMA](mfma) instructions issued per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - MFMA-BF16 Instructions + - The total number of 16-bit brain floating point [MFMA](mfma) instructions issued per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - MFMA-F32 Instructions + - The total number of 32-bit floating-point [MFMA](mfma) instructions issued per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - MFMA-F64 Instructions + - The total number of 64-bit floating-point [MFMA](mfma) instructions issued per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +``` + +#### Compute Pipeline + +(FLOP_count)= +##### FLOP counting conventions + +Omniperf's conventions for VALU FLOP counting are as follows: + - Addition or Multiplication: 1 operation + - Transcendentals: 1 operation + - Fused Multiply-Add (FMA): 2 operations + +Integer operations (IOPs) do not use this convention. They are counted as a single operation regardless of the instruction type. + +```{note} +Packed operations which operate on multiple operands in the same instruction are counted identically to the underlying instruction type. +For example, the `v_pk_add_f32` instruction on [MI2XX](2xxnote), which performs an add operation on two pairs of aligned 32-bit floating-point operands is counted only as a single addition (i.e., 1 operation). +``` + +As discussed in the [Instruction Mix](Inst_Mix) section, the FLOP/IOP metrics in this section do not take into account the execution mask of the operation, and will report the same value even if the execution mask is identically zero. + +For example, a FMA instruction operating on 32-bit floating-point operands (e.g., `v_fma_f32` on a [MI2XX](2xxnote) accelerator) would be counted as 128 total FLOPs: 2 operations (due to the instruction type) multiplied by 64 operations (because the wavefront is composed of 64 work-items). + +(Compute_SOL)= +##### Compute Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. +``` + +This section reports the number of floating-point and integer operations executed on the [VALU](valu) and [MFMA](mfma) units in various precisions. +We note that unlike the [VALU instruction mix](VALU_Inst_Mix) and [MFMA instruction mix](MFMA_Inst_mix) sections, the metrics here are reported as FLOPs and IOPs, i.e., the total number of operations executed. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - VALU FLOPs + - The total floating-point operations executed per second on the [VALU](valu). This is also presented as a percent of the peak theoretical FLOPs achievable on the specific accelerator. Note: this does not include any floating-point operations from [MFMA](mfma) instructions. + - GFLOPs +* - VALU IOPs + - The total integer operations executed per second on the [VALU](valu). This is also presented as a percent of the peak theoretical IOPs achievable on the specific accelerator. Note: this does not include any integer operations from [MFMA](mfma) instructions. + - GIOPs +* - MFMA FLOPs (BF16) + - The total number of 16-bit brain floating point [MFMA](mfma) operations executed per second. Note: this does not include any 16-bit brain floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical BF16 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - MFMA FLOPs (F16) + - The total number of 16-bit floating point [MFMA](mfma) operations executed per second. Note: this does not include any 16-bit floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical F16 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - MFMA FLOPs (F32) + - The total number of 32-bit floating point [MFMA](mfma) operations executed per second. Note: this does not include any 32-bit floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical F32 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - MFMA FLOPs (F64) + - The total number of 64-bit floating point [MFMA](mfma) operations executed per second. Note: this does not include any 64-bit floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical F64 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - MFMA IOPs (INT8) + - The total number of 8-bit integer [MFMA](mfma) operations executed per second. Note: this does not include any 8-bit integer operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical INT8 MFMA operations achievable on the specific accelerator. + - GIOPs +``` + + +(Pipeline_stats)= +##### Pipeline Statistics + +This section reports a number of key performance characteristics of various execution units on the [CU](cu). +The reader is referred to the [Instructions per-cycle and Utilizations](IPC_example) example for a detailed dive into these metrics, and the [scheduler](scheduler) for a high-level overview of execution units and instruction issue. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - IPC + - The ratio of the total number of instructions executed on the [CU](cu) over the [total active CU cycles](TotalActiveCUCycles). + - Instructions per-cycle +* - IPC (Issued) + - The ratio of the total number of (non-[internal](Internal_ipc)) instructions issued over the number of cycles where the [scheduler](scheduler) was actively working on issuing instructions. The reader is recommended the [Issued IPC](Issued_ipc) example for further detail. + - Instructions per-cycle +* - SALU Utilization + - Indicates what percent of the kernel's duration the [SALU](salu) was busy executing instructions. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [SALU](salu) / [SMEM](salu) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - VALU Utilization + - Indicates what percent of the kernel's duration the [VALU](valu) was busy executing instructions. Does not include [VMEM](valu) operations. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [VALU](valu) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - VMEM Utilization + - Indicates what percent of the kernel's duration the [VMEM](valu) unit was busy executing instructions, including both global/generic and spill/scratch operations (see the [VMEM instruction count metrics](TA_inst) for more detail). Does not include [VALU](valu) operations. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [VMEM](valu) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - Branch Utilization + - Indicates what percent of the kernel's duration the [Branch](branch) unit was busy executing instructions. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [Branch](branch) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - VALU Active Threads + - Indicates the average level of [divergence](Divergence) within a wavefront over the lifetime of the kernel. The number of work-items that were active in a wavefront during execution of each [VALU](valu) instruction, time-averaged over all VALU instructions run on all wavefronts in the kernel. + - Work-items +* - MFMA Utilization + - Indicates what percent of the kernel's duration the [MFMA](mfma) unit was busy executing instructions. Computed as the ratio of the total number of cycles spent by the [MFMA](salu) was busy over the [total CU cycles](TotalCUCycles). + - Percent +* - MFMA Instruction Cycles + - The average duration of [MFMA](mfma) instructions in this kernel in cycles. Computed as the ratio of the total number of cycles the [MFMA](mfma) unit was busy over the total number of [MFMA](mfma) instructions. Compare to e.g., the [AMD Matrix Instruction Calculator](https://github.com/RadeonOpenCompute/amd_matrix_instruction_calculator). + - Cycles per instruction +* - VMEM Latency + - The average number of round-trip cycles (i.e., from issue to data-return / acknowledgment) required for a VMEM instruction to complete. + - Cycles +* - SMEM Latency + - The average number of round-trip cycles (i.e., from issue to data-return / acknowledgment) required for a SMEM instruction to complete. + - Cycles +``` + +```{note} +The Branch utilization reported in this section also includes time spent in other instruction types (namely: `s_endpgm`) that are _typically_ a very small percentage of the overall kernel execution. This complication is omitted for simplicity, but may result in small amounts of "branch" utilization (<<1\%) for otherwise branch-less kernels. +``` + +(FLOPS)= +##### Arithmetic Operations + +This section reports the total number of floating-point and integer operations executed in various precisions. +Unlike the [Compute speed-of-light](Compute_SOL) panel, this section reports both [VALU](valu) and [MFMA](mfma) operations of the same precision (e.g., F32) in the same metric. +Additionally, this panel lets the user control how the data is normalized (i.e., control the [normalization-unit](normunit)), while the speed-of-light panel does not. +For more detail on how operations are counted see the [FLOP counting convention](FLOP_count) section. + +```{warning} +As discussed in the [Instruction Mix](Inst_Mix) section, the metrics in this section do not take into account the execution mask of the operation, and will report the same value even if EXEC is identically zero. +``` + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - FLOPs (Total) + - The total number of floating-point operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit) + - FLOP per [normalization-unit](normunit) +* - IOPs (Total) + - The total number of integer operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit) + - IOP per [normalization-unit](normunit) +* - F16 OPs + - The total number of 16-bit floating-point operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit) + - FLOP per [normalization-unit](normunit) +* - BF16 OPs + - The total number of 16-bit brain floating-point operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit). Note: on current CDNA accelerators, the [VALU](valu) has no native BF16 instructions. + - FLOP per [normalization-unit](normunit) +* - F32 OPs + - The total number of 32-bit floating-point operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit) + - FLOP per [normalization-unit](normunit) +* - F64 OPs + - The total number of 64-bit floating-point operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit) + - FLOP per [normalization-unit](normunit) +* - INT8 OPs + - The total number of 8-bit integer operations executed on either the [VALU](valu) or [MFMA](mfma) units, per [normalization-unit](normunit). Note: on current CDNA accelerators, the [VALU](valu) has no native INT8 instructions. + - IOPs per [normalization-unit](normunit) +``` + +(LDS_metrics)= +### Local Data Share (LDS) + +#### LDS Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. +``` + +The LDS speed-of-light chart shows a number of key metrics for the [LDS](lds) as a comparison with the peak achievable values of those metrics. +The reader is referred to our previous [LDS](lds) description for a more in-depth view of the hardware. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Utilization + - Indicates what percent of the kernel's duration the [LDS](lds) was actively executing instructions (including, but not limited to, load, store, atomic and HIP's `__shfl` operations). Calculated as the ratio of the total number of cycles LDS was active over the [total CU cycles](TotalCUCycles). + - Percent +* - Access Rate + - Indicates the percentage of SIMDs in the [VALU](valu){sup}`1` actively issuing LDS instructions, averaged over the lifetime of the kernel. Calculated as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [LDS](lds) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - Theoretical Bandwidth (% of Peak) + - Indicates the maximum amount of bytes that _could_ have been loaded from/stored to/atomically updated in the LDS in this kernel, as a percent of the peak LDS bandwidth achievable. See the [LDS Bandwidth example](lds_bandwidth) for more detail. + - Percent +* - Bank Conflict Rate + - Indicates the percentage of active LDS cycles that were spent servicing bank conflicts. Calculated as the ratio of LDS cycles spent servicing bank conflicts over the number of LDS cycles that would have been required to move the same amount of data in an uncontended access.{sup}`2` + - Percent +``` + +```{note} +{sup}`1` Here we assume the typical case where the workload evenly distributes LDS operations over all SIMDs in a CU (that is, waves on different SIMDs are executing similar code). +For highly unbalanced workloads, where e.g., one SIMD pair in the CU does not issue LDS instructions at all, this metric is better interpreted as the percentage of SIMDs issuing LDS instructions on [SIMD pairs](lds) that are actively using the LDS, averaged over the lifetime of the kernel. + +{sup}`2` The maximum value of the bank conflict rate is less than 100% (specifically: 96.875%), as the first cycle in the [LDS scheduler](lds) is never considered contended. +``` + +#### Statistics + +The [LDS](lds) statistics panel gives a more detailed view of the hardware: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - LDS Instructions + - The total number of LDS instructions (including, but not limited to, read/write/atomics, and e.g., HIP's `__shfl` instructions) executed per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Theoretical Bandwidth + - Indicates the maximum amount of bytes that could have been loaded from/stored to/atomically updated in the LDS per [normalization-unit](normunit). Does _not_ take into account the execution mask of the wavefront when the instruction was executed (see [LDS Bandwidth](lds_bandwidth) example for more detail). + - Bytes per [normalization-unit](normunit) +* - LDS Latency + - The average number of round-trip cycles (i.e., from issue to data-return / acknowledgment) required for an LDS instruction to complete. + - Cycles +* - Bank Conflicts/Access + - The ratio of the number of cycles spent in the [LDS scheduler](lds) due to bank conflicts (as determined by the conflict resolution hardware) to the base number of cycles that would be spent in the LDS scheduler in a completely uncontended case. This is the unnormalized form of the Bank Conflict Rate. + - Conflicts/Access +* - Index Accesses + - The total number of cycles spent in the [LDS scheduler](lds) over all operations per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Atomic Return Cycles + - The total number of cycles spent on LDS atomics with return per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Bank Conflicts + - The total number of cycles spent in the [LDS scheduler](lds) due to bank conflicts (as determined by the conflict resolution hardware) per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Address Conflicts + - The total number of cycles spent in the [LDS scheduler](lds) due to address conflicts (as determined by the conflict resolution hardware) per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Unaligned Stall + - The total number of cycles spent in the [LDS scheduler](lds) due to stalls from non-dword aligned addresses per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Memory Violations + - The total number of out-of-bounds accesses made to the LDS, per [normalization-unit](normunit). This is unused and expected to be zero in most configurations for modern CDNA accelerators. + - Accesses per [normalization-unit](normunit) +``` + + +(vL1D)= +### Vector L1 Cache (vL1D) + +The vector L1 data (vL1D) cache is local to each [compute unit](CU) on the accelerator, and handles vector memory operations issued by a wavefront. +The vL1D cache consists of several components: + + - an address processing unit, also known as the [texture addresser (TA)](TA), which receives commands (e.g., instructions) and write/atomic data from the [Compute Unit](CU), and coalesces them into fewer requests for the cache to process. + - an address translation unit, also known as the L1 Unified Translation Cache (UTCL1), that translates requests from virtual to physical addresses for lookup in the cache. The translation unit has an L1 translation lookaside buffer (L1TLB) to reduce the cost of repeated translations. + - a Tag RAM that looks up whether a requested cache line is already present in the [cache](TC). + - the result of the Tag RAM lookup is placed in the L1 cache controller for routing to the correct location, e.g., the [L2 Memory Interface](TCP_TCC_Transactions_Detail) for misses or the [Cache RAM](TC) for hits. + - the Cache RAM, also known as the [texture cache (TC)](TC), stores requested data for potential reuse. Data returned from the [L2 cache](L2) is placed into the Cache RAM before going down the [data-return path](TD). + - a backend data processing unit, also known as the [texture data (TD)](TD) that routes data back to the requesting [Compute Unit](CU). + +Together, this complex is known as the vL1D, or Texture Cache per Pipe (TCP). +A simplified diagram of the vL1D is presented below: + +```{figure} images/l1perf_model.* +:scale: 150 % +:alt: Performance model of the vL1D Cache on AMD Instinct(tm) MI accelerators. +:align: center + +Performance model of the vL1D Cache on AMD Instinct(tm) MI accelerators. +``` + +(L1_SOL)= +#### vL1D Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. +``` + +The vL1D's speed-of-light chart shows several key metrics for the vL1D as a comparison with the peak achievable values of those metrics. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Hit Rate + - The ratio of the number of vL1D cache line requests that hit{sup}`1` in vL1D cache over the total number of cache line requests to the [vL1D Cache RAM](TC). + - Percent +* - Bandwidth + - The number of bytes looked up in the vL1D cache as a result of [VMEM](VALU) instructions, as a percent of the peak theoretical bandwidth achievable on the specific accelerator. The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. + - Percent +* - Utilization + - Indicates how busy the [vL1D Cache RAM](TC) was during the kernel execution. The number of cycles where the [vL1D Cache RAM](TC) is actively processing any request divided by the number of cycles where the [vL1D is active](vL1d_activity){sup}`2` + - Percent +* - Coalescing + - Indicates how well memory instructions were coalesced by the [address processing unit](TA), ranging from uncoalesced (25\%) to fully coalesced (100\%). The average number of [thread-requests](ThreadRequests) generated per instruction divided by the ideal number of [thread-requests](ThreadRequests) per instruction. + - Percent +``` + +(vL1d_activity)= +```{note} +{sup}`1` The vL1D cache on AMD Instinct(tm) MI CDNA accelerators uses a "hit-on-miss" approach to reporting cache hits. +That is, if while satisfying a miss, another request comes in that would hit on the same pending cache line, the subsequent request will be counted as a 'hit'. +Therefore, it is also important to consider the Access Latency metric in the [Cache access metrics](TCP_cache_access_metrics) section when evaluating the vL1D hit rate. + +{sup}`2` Omniperf considers the vL1D to be active when any part of the vL1D (excluding the [address-processor](TA) and [data-return](TD) units) are active, e.g., performing a translation, waiting for data, accessing the Tag or Cache RAMs, etc. +``` +(TA)= +#### Address Processing Unit or Texture Addresser (TA) + +The [vL1D](vL1D)'s address processing unit receives vector memory instructions (commands) along with write/atomic data from a [Compute Unit](CU) and is responsible for coalescing these into requests for lookup in the [vL1D RAM](TC). +The address processor passes information about the commands (coalescing state, destination SIMD, etc.) to the [data processing unit](TD) for use after the requested data has been retrieved. + +Omniperf reports several metrics to indicate performance bottlenecks in the address processing unit, which are broken down into a few categories: + + - Busy / stall metrics + - Instruction counts + - Spill / Stack metrics + +##### Busy / Stall metrics + +When executing vector memory instructions, the compute unit must send an address (and in the case of writes/atomics, data) to the address processing unit. When the frontend cannot accept any more addresses, it must backpressure the wave-issue logic for the VMEM pipe and prevent the issue of a vector memory instruction until a previously issued memory operation has been processed. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Busy + - Percent of the [total CU cycles](TotalCUCycles) the address processor was busy + - Percent +* - Address Stall + - Percent of the [total CU cycles](TotalCUCycles) the address processor was stalled from sending address requests further into the vL1D pipeline + - Percent +* - Data Stall + - Percent of the [total CU cycles](TotalCUCycles) the address processor was stalled from sending write/atomic data further into the vL1D pipeline + - Percent +* - Data-Processor → Address Stall + - Percent of [total CU cycles](TotalCUCycles) the address processor was stalled waiting to send command data to the [data processor](TD) + - Percent +``` + + +(TA_inst)= +##### Instruction counts + +The address processor also counts instruction types to give the user information on what sorts of memory instructions were executed by the kernel. +These are broken down into a few major categories: + +```{list-table} +:header-rows: 1 +:widths: 20 20 60 +:class: noscroll-table +* - Memory type + - Usage + - Description +* - Global + - Global memory + - Global memory can be seen by all threads from a process. This includes the local accelerator's DRAM, remote accelerator's DRAM, and the host's DRAM. +* - Generic + - Dynamic address spaces + - Generic memory, a.k.a. "flat" memory, is used when the compiler cannot statically prove that a pointer is to memory in one or the other address spaces. The pointer could dynamically point into global, local, constant, or private memory. +* - Private Memory + - Register spills / Stack memory + - Private memory, a.k.a. "scratch" memory, is only visible to a particular [work-item](workitem) in a particular [workgroup](workgroup). On AMD Instinct(tm) MI accelerators, private memory is used to implement both register spills and stack memory accesses. +``` + +The address processor counts these instruction types as follows: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table + +* - Type + - Description + - Unit +* - Global/Generic + - The total number of global & generic memory instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Global/Generic Read + - The total number of global & generic memory read instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Global/Generic Write + - The total number of global & generic memory write instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Global/Generic Atomic + - The total number of global & generic memory atomic (with and without return) instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Spill/Stack + - The total number of spill/stack memory instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Spill/Stack Read + - The total number of spill/stack memory read instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Spill/Stack Write + - The total number of spill/stack memory write instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). + - Instruction per [normalization-unit](normunit) +* - Spill/Stack Atomic + - The total number of spill/stack memory atomic (with and without return) instructions executed on all [compute units](CU) on the accelerator, per [normalization-unit](normunit). Typically unused as these memory operations are typically used to implement thread-local storage. + - Instructions per [normalization-unit](normunit) +``` + +```{note} +The above is a simplified model specifically for the HIP programming language that does not consider (e.g.,) inline assembly usage, constant memory usage or texture memory. + +These categories correspond to: + - Global/Generic: global and flat memory operations, that are used for Global and Generic memory access. + - Spill/Stack: buffer instructions which are used on the MI50, MI100, and [MI2XX](2xxnote) accelerators for register spills / stack memory. + +These concepts are described in more detail in the [memory space section](Mspace) below, while generic memory access is explored in the [generic memory benchmark](flatmembench) section. +``` + +##### Spill/Stack metrics + +Finally, the address processing unit contains a separate coalescing stage for spill/stack memory, and thus reports: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Spill/Stack Total Cycles + - The number of cycles the address processing unit spent working on spill/stack instructions, per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Spill/Stack Coalesced Read Cycles + - The number of cycles the address processing unit spent working on coalesced spill/stack read instructions, per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +* - Spill/Stack Coalesced Write Cycles + - The number of cycles the address processing unit spent working on coalesced spill/stack write instructions, per [normalization-unit](normunit) + - Cycles per [normalization-unit](normunit) +``` + +(UTCL1)= +#### L1 Unified Translation Cache (UTCL1) + +After a vector memory instruction has been processed/coalesced by the address processing unit of the vL1D, it must be translated from a virtual to physical address. +This process is handled by the L1 Unified Translation Cache (UTCL1). +This cache contains a L1 Translation Lookaside Buffer (TLB) which stores recently translated addresses to reduce the cost of subsequent re-translations. + +Omniperf reports the following L1 TLB metrics: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Requests + - The number of translation requests made to the UTCL1 per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Hits + - The number of translation requests that hit in the UTCL1, and could be reused, per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Hit Ratio + - The ratio of the number of translation requests that hit in the UTCL1 divided by the total number of translation requests made to the UTCL1. + - Percent +* - Translation Misses + - The total number of translation requests that missed in the UTCL1 due to translation not being present in the cache, per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Permission Misses + - The total number of translation requests that missed in the UTCL1 due to a permission error, per [normalization-unit](normunit). This is unused and expected to be zero in most configurations for modern CDNA accelerators. + - Requests per [normalization-unit](normunit) +``` +```{note} +On current CDNA accelerators, such as the [MI2XX](2xxnote), the UTCL1 does _not_ count hit-on-miss requests. +``` + +(TC)= +#### Vector L1 Cache RAM (TC) + +After coalescing in the [address processing unit](TA) of the v1LD, and address translation in the [L1 TLB](UTCL1) the request proceeds to the Cache RAM stage of the pipeline. +Incoming requests are looked up in the cache RAMs using parts of the physical address as a tag. +Hits will be returned through the [data-return path](TD), while misses will routed out to the [L2 Cache](L2) for servicing. + +The metrics tracked by the vL1D RAM include: + + - Stall metrics + - Cache access metrics + - vL1D-L2 transaction detail metrics + +(TCP_cache_stall_metrics)= +##### vL1D cache stall metrics + +The vL1D also reports where it is stalled in the pipeline, which may indicate performance limiters of the cache. +A stall in the pipeline may result in backpressuring earlier parts of the pipeline, e.g., a stall on L2 requests may backpressure the wave-issue logic of the [VMEM](VALU) pipe and prevent it from issuing more vector memory instructions until the vL1D's outstanding requests are completed. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Stalled on L2 Data + - The ratio of the number of cycles where the vL1D is stalled waiting for requested data to return from the [L2 cache](L2) divided by the number of cycles where the [vL1D is active](vL1d_activity). + - Percent +* - Stalled on L2 Requests + - The ratio of the number of cycles where the vL1D is stalled waiting to issue a request for data to the [L2 cache](L2) divided by the number of cycles where the [vL1D is active](vL1d_activity). + - Percent +* - Tag RAM Stall (Read/Write/Atomic) + - The ratio of the number of cycles where the vL1D is stalled due to Read/Write/Atomic requests with conflicting tags being looked up concurrently, divided by the number of cycles where the [vL1D is active](vL1d_activity). + - Percent +``` + +(TCP_cache_access_metrics)= +##### vL1D cache access metrics + +The vL1D cache access metrics broadly indicate the type of requests incoming from the [cache frontend](TA), the number of requests that were serviced by the vL1D, and the number & type of outgoing requests to the [L2 cache](L2). In addition, this section includes the approximate latencies of accesses to the cache itself, along with latencies of read/write memory operations to the [L2 cache](L2). + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Total Requests + - The total number of incoming requests from the [address processing unit](TA) after coalescing. + - Requests +* - Total read/write/atomic requests + - The total number of incoming read/write/atomic requests from the [address processing unit](TA) after coalescing per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Cache Bandwidth + - The number of bytes looked up in the vL1D cache as a result of [VMEM](VALU) instructions per [normalization-unit](normunit). The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. + - Bytes per [normalization-unit](normunit) +* - Cache Hit Rate + - The ratio of the number of vL1D cache line requests that hit in vL1D cache over the total number of cache line requests to the [vL1D Cache RAM](TC). + - Percent +* - Cache Accesses + - The total number of cache line lookups in the vL1D. + - Cache lines +* - Cache Hits + - The number of cache accesses minus the number of outgoing requests to the [L2 cache](L2), i.e., the number of cache line requests serviced by the [vL1D Cache RAM](TC) per [normalization-unit](normunit). + - Cache lines per [normalization-unit](normunit) +* - Invalidations + - The number of times the vL1D was issued a write-back invalidate command during the kernel's execution per [normalization-unit](normunit). This may be triggered by, e.g., the `buffer_wbinvl1` instruction. + - Invalidations per [normalization-unit](normunit) +* - L1-L2 Bandwidth + - The number of bytes transferred across the vL1D-L2 interface as a result of [VMEM](VALU) instructions, per [normalization-unit](normunit). The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. + - Bytes per [normalization-unit](normunit) +* - L1-L2 Reads + - The number of read requests for a vL1D cache line that were not satisfied by the vL1D and must be retrieved from the to the [L2 Cache](L2) per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - L1-L2 Writes + - The number of post-coalescing write requests that are sent through the vL1D to the [L2 cache](L2), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - L1-L2 Atomics + - The number of atomic requests that are sent through the vL1D to the [L2 cache](L2), per [normalization-unit](normunit). This includes requests for atomics with, and without return. + - Requests per [normalization-unit](normunit) +* - L1 Access Latency + - The average number of cycles that a vL1D cache line request spent in the vL1D cache pipeline. + - Cycles +* - L1-L2 Read Access Latency + - The average number of cycles that the vL1D cache took to issue and receive read requests from the [L2 Cache](L2). This number also includes requests for atomics with return values. + - Cycles +* - L1-L2 Write Access Latency + - The average number of cycles that the vL1D cache took to issue and receive acknowledgement of a write request to the [L2 Cache](L2). This number also includes requests for atomics without return values. + - Cycles +``` + +```{note} +All cache accesses in vL1D are for a single cache line's worth of data. +The size of a cache line may vary, however on current AMD Instinct(tm) MI CDNA accelerators and GCN GPUs the L1 cache line size is 64B. +``` + +(TCP_TCC_Transactions_Detail)= +##### vL1D - L2 Transaction Detail + +This section provides a more granular look at the types of requests made to the [L2 cache](L2). +These are broken down by the operation type (read / write / atomic, with, or without return), and the [memory type](Mtype). +For more detail, the reader is referred to the [Memory Types](Mtype) section. + + +(TD)= +#### Vector L1 Data-Return Path or Texture Data (TD) + +The data-return path of the vL1D cache, also known as the Texture Data (TD) unit, is responsible for routing data returned from the [vL1D cache RAM](TC) back to a wavefront on a SIMD. +As described in the [vL1D cache front-end](TA) section, the data-return path is passed information about the space requirements and routing for data requests from the [VALU](valu). +When data is returned from the [vL1D cache RAM](TC), it is matched to this previously stored request data, and returned to the appropriate SIMD. + +Omniperf reports the following vL1D data-return path metrics: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Data-return Busy + - Percent of the [total CU cycles](TotalCUCycles) the data-return unit was busy processing or waiting on data to return to the [CU](CU). + - Percent +* - Cache RAM → Data-return Stall + - Percent of the [total CU cycles](TotalCUCycles) the data-return unit was stalled on data to be returned from the [vL1D Cache RAM](TC). + - Percent +* - Workgroup manager → Data-return Stall + - Percent of the [total CU cycles](TotalCUCycles) the data-return unit was stalled by the [workgroup manager](SPI) due to initialization of registers as a part of launching new workgroups. + - Percent +* - Coalescable Instructions + - The number of instructions submitted to the [data-return unit](TD) by the [address-processor](TA) that were found to be coalescable, per [normalization-unit](normunit). + - Instructions per [normalization-unit](normunit) +* - Read Instructions + - The number of read instructions submitted to the [data-return unit](TD) by the [address-processor](TA) summed over all [compute units](CU) on the accelerator, per [normalization-unit](normunit). This is expected to be the sum of global/generic and spill/stack reads in the [address processor](TA_inst). + - Instructions per [normalization-unit](normunit) +* - Write Instructions + - The number of store instructions submitted to the [data-return unit](TD) by the [address-processor](TA) summed over all [compute units](CU) on the accelerator, per [normalization-unit](normunit). This is expected to be the sum of global/generic and spill/stack stores counted by the [vL1D cache-frontend](TA_inst). + - Instructions per [normalization-unit](normunit) +* - Atomic Instructions + - The number of atomic instructions submitted to the [data-return unit](TD) by the [address-processor](TA) summed over all [compute units](CU) on the accelerator, per [normalization-unit](normunit). This is expected to be the sum of global/generic and spill/stack atomics in the [address processor](TA_inst). + - Instructions per [normalization-unit](normunit) +``` + +(L2)= +## L2 Cache (TCC) + +The L2 cache is the coherence point for current AMD Instinct(tm) MI GCN GPUs and CDNA accelerators, and is shared by all [compute units](CU) on the device. +Besides serving requests from the [vector L1 data caches](vL1D), the L2 cache also is responsible for servicing requests from the [L1 instruction caches](L1I), the [scalar L1 data caches](sL1D) and the [command-processor](CP). +The L2 cache is composed of a number of distinct channels (32 on MI100/[MI2XX](2xxnote) series CDNA accelerators at 256B address interleaving) which can largely operate independently. +Mapping of incoming requests to a specific L2 channel is determined by a hashing mechanism that attempts to evenly distribute requests across the L2 channels. +Requests that miss in the L2 cache are passed out to [Infinity Fabric(tm)](l2fabric) to be routed to the appropriate memory location. + +The L2 cache metrics reported by Omniperf are broken down into four categories: + + - L2 Speed-of-Light + - L2 Cache Accesses + - L2-Fabric Transactions + - L2-Fabric Stalls + + +(L2SoL)= +### L2 Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. +``` + +The L2 cache's speed-of-light table contains a few key metrics about the performance of the L2 cache, aggregated over all the L2 channels, as a comparison with the peak achievable values of those metrics: + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Utilization + - The ratio of the [number of cycles an L2 channel was active, summed over all L2 channels on the accelerator](TotalActiveL2Cycles) over the [total L2 cycles](TotalL2Cycles). + - Percent +* - Bandwidth + - The number of bytes looked up in the L2 cache, as a percent of the peak theoretical bandwidth achievable on the specific accelerator. The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. + - Percent +* - Hit Rate + - The ratio of the number of L2 cache line requests that hit in the L2 cache over the total number of incoming cache line requests to the L2 cache. + - Percent +* - L2-Fabric Read BW + - The number of bytes read by the L2 over the [Infinity Fabric(tm) interface](l2fabric) per unit time. + - GB/s +* - L2-Fabric Write and Atomic BW + - The number of bytes sent by the L2 over the [Infinity Fabric(tm) interface](l2fabric) by write and atomic operations per unit time. + - GB/s +``` + +```{note} +The L2 cache on AMD Instinct(tm) MI CDNA accelerators uses a "hit-on-miss" approach to reporting cache hits. +That is, if while satisfying a miss, another request comes in that would hit on the same pending cache line, the subsequent request will be counted as a 'hit'. +Therefore, it is also important to consider the latency metric in the [L2-Fabric](l2fabric) section when evaluating the L2 hit rate. +``` + +(L2_cache_metrics)= +### L2 Cache Accesses + +This section details the incoming requests to the L2 cache from the [vL1D](vL1D) and other clients (e.g., the [sL1D](sL1D) and [L1I](L1I) caches). + +```{list-table} +:header-rows: 1 +:widths: 13 70 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Bandwidth + - The number of bytes looked up in the L2 cache, per [normalization-unit](normunit). The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. + - Bytes per [normalization-unit](normunit) +* - Requests + - The total number of incoming requests to the L2 from all clients for all request types, per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Read Requests + - The total number of read requests to the L2 from all clients. + - Requests per [normalization-unit](normunit) +* - Write Requests + - The total number of write requests to the L2 from all clients. + - Requests per [normalization-unit](normunit) +* - Atomic Requests + - The total number of atomic requests (with and without return) to the L2 from all clients. + - Requests per [normalization-unit](normunit) +* - Streaming Requests + - The total number of incoming requests to the L2 that are marked as 'streaming'. The exact meaning of this may differ depending on the targeted accelerator, however on an [MI2XX](2xxnote) this corresponds to [non-temporal load or stores](https://clang.llvm.org/docs/LanguageExtensions.html#non-temporal-load-store-builtins). The L2 cache attempts to evict 'streaming' requests before normal requests when the L2 is at capacity. + - Requests per [normalization-unit](normunit) +* - Probe Requests + - The number of coherence probe requests made to the L2 cache from outside the accelerator. On an [MI2XX](2xxnote), probe requests may be generated by e.g., writes to [fine-grained device](MType) memory or by writes to [coarse-grained](MType) device memory. + - Requests per [normalization-unit](normunit) +* - Hit Rate + - The ratio of the number of L2 cache line requests that hit in the L2 cache over the total number of incoming cache line requests to the L2 cache. + - Percent +* - Hits + - The total number of requests to the L2 from all clients that hit in the cache. As noted in the [speed-of-light](L2SoL) section, this includes hit-on-miss requests. + - Requests per [normalization-unit](normunit) +* - Misses + - The total number of requests to the L2 from all clients that miss in the cache. As noted in the [speed-of-light](L2SoL) section, these do not include hit-on-miss requests. + - Requests per [normalization-unit](normunit) +* - Writebacks + - The total number of L2 cache lines written back to memory for any reason. Write-backs may occur due to e.g., user-code (e.g., HIP kernel calls to `__threadfence_system`, or atomic built-ins), by the [command-processor](CP)'s memory acquire/release fences, or for other internal hardware reasons. + - Cache lines per [normalization-unit](normunit) +* - Writebacks (Internal) + - The total number of L2 cache lines written back to memory for internal hardware reasons, per [normalization-unit](normunit). + - Cache lines per [normalization-unit](normunit) +* - Writebacks (vL1D Req) + - The total number of L2 cache lines written back to memory due to requests initiated by the [vL1D cache](vL1D), per [normalization-unit](normunit). + - Cache lines per [normalization-unit](normunit) +* - Evictions (Normal) + - The total number of L2 cache lines evicted from the cache due to capacity limits, per [normalization-unit](normunit), per [normalization-unit](normunit). + - Cache lines per [normalization-unit](normunit) +* - Evictions (vL1D Req) + - The total number of L2 cache lines evicted from the cache due to invalidation requests initiated by the [vL1D cache](vL1D), per [normalization-unit](normunit). + - Cache lines per [normalization-unit](normunit) +* - Non-hardware-Coherent Requests + - The total number of requests to the L2 to Not-hardware-Coherent (NC) memory allocations, per [normalization-unit](normunit). See the [Memory Types section](Mtype) for more detail. + - Requests per [normalization-unit](normunit) +* - Uncached Requests + - The total number of requests to the L2 that to uncached (UC) memory allocations. See the [Memory Types section](Mtype) for more detail. + - Requests per [normalization-unit](normunit) +* - Coherently Cached Requests + - The total number of requests to the L2 that to coherently cachable (CC) memory allocations. See the [Memory Types section](Mtype) for more detail. + - Requests per [normalization-unit](normunit) +* - Read/Write Coherent Requests + - The total number of requests to the L2 that to Read-Write coherent memory (RW) allocations. See the [Memory Types section](Mtype) for more detail. + - Requests per [normalization-unit](normunit) +``` + +```{note} +All requests to the L2 are for a single cache line's worth of data. +The size of a cache line may vary depending on the accelerator, however on an AMD Instinct(tm) CDNA2 [MI2XX](2xxnote) accelerator, it is 128B, while on an MI100, it is 64B. +``` + +(l2fabric)= +### L2-Fabric transactions + +Requests/data that miss in the L2 must be routed to memory in order to service them. +The backing memory for a request may be local to this accelerator (i.e., in the local high-bandwidth memory), in a remote accelerator's memory, or even in the CPU's memory. +Infinity Fabric(tm) is responsible for routing these memory requests/data to the correct location and returning any fetched data to the L2 cache. +The [following section](L2_req_flow) describes the flow of these requests through Infinity Fabric(tm) in more detail, as described by Omniperf metrics, while [later sections](L2_req_metrics) give detailed definitions of individual metrics. + +(L2_req_flow)= +#### Request flow + +Below is a diagram that illustrates how L2↔Fabric requests are reported by Omniperf: + + +```{figure} images/fabric.png +:alt: L2↔Fabric transaction flow on AMD Instinct(tm) MI accelerators. +:align: center +:name: fabric-fig + +L2↔Fabric transaction flow on AMD Instinct(tm) MI accelerators. +``` + +Requests from the L2 Cache are broken down into two major categories, read requests and write requests (at this granularity, atomic requests are treated as writes). + +From there, these requests can additionally subdivided in a number of ways. +First, these requests may be sent across Infinity Fabric(tm) as different transaction sizes, 32B or 64B on current CDNA accelerators. + +```{note} +On current CDNA accelerators, the 32B read request path is expected to be unused (hence: is disconnected in the flow diagram). +``` + +In addition, the read and write requests can be further categorized as: + - uncached read/write requests, e.g., for accesses to [fine-grained memory](Mtype) + - atomic requests, e.g., for atomic updates to [fine-grained memory](Mtype) + - HBM read/write requests OR remote read/write requests, i.e., for requests to the accelerator's local HBM OR requests to a remote accelerator's HBM / the CPU's DRAM. + +These classifications are not necessarily _exclusive_, for example, a write request can be classified as an atomic request to the accelerator's local HBM, and an uncached write request. +The request-flow diagram marks _exclusive_ classifications as a splitting of the flow, while _non-exclusive_ requests do not split the flow line. +For example, a request is either a 32B Write Request OR a 64B Write request, as the flow splits at this point: +```{figure} images/split.* +:scale: 50 % +:alt: Request flow splitting +:align: center +:name: split-request-flow-fig + +Splitting request flow +``` +However, continuing along, the same request might be an Atomic request and an Uncached Write request, as reflected by a non-split flow: +```{figure} images/nosplit.* +:scale: 50 % +:alt: Request flow splitting +:align: center +:name: nosplit-request-flow-fig + +Non-splitting request flow +``` + +Finally, we note that [uncached](Mtype) read requests (e.g., to [fine-grained memory](Mtype)) are handled specially on CDNA accelerators, as indicated in the request flow diagram. +These are expected to be counted as a 64B Read Request, and _if_ they are requests to uncached memory (denoted by the dashed line), they will also be counted as _two_ uncached read requests (i.e., the request is split): + +```{figure} images/uncached.* +:scale: 50 % +:alt: Uncached read-request splitting +:align: center +:name: uncached-read-request-flow-fig + +Uncached read-request splitting. +``` + +(L2_req_metrics)= +#### Metrics + + +The following metrics are reported for the L2-Fabric interface: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - L2-Fabric Read Bandwidth + - The total number of bytes read by the L2 cache from Infinity Fabric(tm) per [normalization-unit](normunit). + - Bytes per [normalization-unit](normunit) +* - HBM Read Traffic + - The percent of read requests generated by the L2 cache that are routed to the accelerator's local high-bandwidth memory (HBM). This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric Read bandwidth directed to the local HBM. + - Percent +* - Remote Read Traffic + - The percent of read requests generated by the L2 cache that are routed to any memory location other than the accelerator's local high-bandwidth memory (HBM) --- e.g., the CPU's DRAM, a remote accelerator's HBM, etc. This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric Read bandwidth directed to a remote location. + - Percent +* - Uncached Read Traffic + - The percent of read requests generated by the L2 cache that are reading from an [uncached memory allocation](Mtype). Note, as described in the [request-flow](L2_req_flow) section, a single 64B read request is typically counted as two uncached read requests, hence it is possible for the Uncached Read Traffic to reach up to 200% of the total number of read requests. This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric read bandwidth directed to an uncached memory location. + - Percent +* - L2-Fabric Write and Atomic Bandwidth + - The total number of bytes written by the L2 over Infinity Fabric(tm) by write and atomic operations per [normalization-unit](normunit). Note that on current CDNA accelerators, such as the [MI2XX](2xxnote), requests are only considered 'atomic' by Infinity Fabric(tm) if they are targeted at non-write-cachable memory, e.g., [fine-grained memory](Mtype) allocations or [uncached memory](Mtype) allocations on the [MI2XX](2xxnote). + - Bytes per [normalization-unit](normunit) +* - HBM Write and Atomic Traffic + - The percent of write and atomic requests generated by the L2 cache that are routed to the accelerator's local high-bandwidth memory (HBM). This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric Write and Atomic bandwidth directed to the local HBM. Note that on current CDNA accelerators, such as the [MI2XX](2xxnote), requests are only considered 'atomic' by Infinity Fabric(tm) if they are targeted at [fine-grained memory](Mtype) allocations or [uncached memory](Mtype) allocations. + - Percent +* - Remote Write and Atomic Traffic + - The percent of write and atomic requests generated by the L2 cache that are routed to any memory location other than the accelerator's local high-bandwidth memory (HBM) --- e.g., the CPU's DRAM, a remote accelerator's HBM, etc. This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric Write and Atomic bandwidth directed to a remote location. Note that on current CDNA accelerators, such as the [MI2XX](2xxnote), requests are only considered 'atomic' by Infinity Fabric(tm) if they are targeted at non-write-cachable memory, e.g., [fine-grained memory](Mtype) allocations or [uncached memory](Mtype) allocations on the [MI2XX](2xxnote). + - Percent +* - Atomic Traffic + - The percent of write requests generated by the L2 cache that are atomic requests to _any_ memory location. This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric Write and Atomic bandwidth that is due to use of atomics. Note that on current CDNA accelerators, such as the [MI2XX](2xxnote), requests are only considered 'atomic' by Infinity Fabric(tm) if they are targeted at [fine-grained memory](Mtype) allocations or [uncached memory](Mtype) allocations. + - Percent +* - Uncached Write and Atomic Traffic + - The percent of write and atomic requests generated by the L2 cache that are targeting [uncached memory allocations](Mtype). This breakdown does not consider the _size_ of the request (i.e., 32B and 64B requests are both counted as a single request), so this metric only _approximates_ the percent of the L2-Fabric read bandwidth directed to uncached memory allocations. + - Percent +* - Read Latency + - The time-averaged number of cycles read requests spent in Infinity Fabric(tm) before data was returned to the L2. + - Cycles +* - Write Latency + - The time-averaged number of cycles write requests spent in Infinity Fabric(tm) before a completion acknowledgement was returned to the L2. + - Cycles +* - Atomic Latency + - The time-averaged number of cycles atomic requests spent in Infinity Fabric(tm) before a completion acknowledgement (atomic without return value) or data (atomic with return value) was returned to the L2. + - Cycles +* - Read Stall + - The ratio of the total number of cycles the L2-Fabric interface was stalled on a read request to any destination (local HBM, remote PCIe(r) connected accelerator / CPU, or remote Infinity Fabric(tm) connected accelerator{sup}`1` / CPU) over the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Write Stall + - The ratio of the total number of cycles the L2-Fabric interface was stalled on a write or atomic request to any destination (local HBM, remote accelerator / CPU, PCIe(r) connected accelerator / CPU, or remote Infinity Fabric(tm) connected accelerator{sup}`1` / CPU) over the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +``` + +(L2_req_metric_details)= +#### Detailed Transaction Metrics + +The following metrics are available in the detailed L2-Fabric transaction breakdown table: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - 32B Read Requests + - The total number of L2 requests to Infinity Fabric(tm) to read 32B of data from any memory location, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. Typically unused on CDNA accelerators. + - Requests per [normalization-unit](normunit) +* - Uncached Read Requests + - The total number of L2 requests to Infinity Fabric(tm) to read [uncached data](Mtype) from any memory location, per [normalization-unit](normunit). 64B requests for uncached data are counted as two 32B uncached data requests. See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - 64B Read Requests + - The total number of L2 requests to Infinity Fabric(tm) to read 64B of data from any memory location, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - HBM Read Requests + - The total number of L2 requests to Infinity Fabric(tm) to read 32B or 64B of data from the accelerator's local HBM, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - Remote Read Requests + - The total number of L2 requests to Infinity Fabric(tm) to read 32B or 64B of data from any source other than the accelerator's local HBM, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - 32B Write and Atomic Requests + - The total number of L2 requests to Infinity Fabric(tm) to write or atomically update 32B of data to any memory location, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - Uncached Write and Atomic Requests + - The total number of L2 requests to Infinity Fabric(tm) to write or atomically update 32B or 64B of [uncached data](Mtype), per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - 64B Write and Atomic Requests + - The total number of L2 requests to Infinity Fabric(tm) to write or atomically update 64B of data in any memory location, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - HBM Write and Atomic Requests + - The total number of L2 requests to Infinity Fabric(tm) to write or atomically update 32B or 64B of data in the accelerator's local HBM, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - Remote Write and Atomic Requests + - The total number of L2 requests to Infinity Fabric(tm) to write or atomically update 32B or 64B of data in any memory location other than the accelerator's local HBM, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. + - Requests per [normalization-unit](normunit) +* - Atomic Requests + - The total number of L2 requests to Infinity Fabric(tm) to atomically update 32B or 64B of data in any memory location, per [normalization-unit](normunit). See [request-flow](L2_req_flow) for more detail. Note that on current CDNA accelerators, such as the [MI2XX](2xxnote), requests are only considered 'atomic' by Infinity Fabric(tm) if they are targeted at non-write-cachable memory, e.g., [fine-grained memory](Mtype) allocations or [uncached memory](Mtype) allocations on the [MI2XX](2xxnote). + - Requests per [normalization-unit](normunit) +``` + +### L2-Fabric Interface Stalls + +When the interface between the L2 cache and Infinity Fabric(tm) becomes backed up by requests, it may stall preventing the L2 from issuing additional requests to Infinity Fabric(tm) until prior requests complete. +This section gives a breakdown of what types of requests in a kernel caused a stall (e.g., read vs write), and to which locations (e.g., to the accelerator's local memory, or to remote accelerators/CPUs). + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Read - PCIe(r) Stall + - The number of cycles the L2-Fabric interface was stalled on read requests to remote PCIe(r) connected accelerators{sup}`1` or CPUs as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Read - Infinity Fabric(tm) Stall + - The number of cycles the L2-Fabric interface was stalled on read requests to remote Infinity Fabric(tm) connected accelerators{sup}`1` or CPUs as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Read - HBM Stall + - The number of cycles the L2-Fabric interface was stalled on read requests to the accelerator's local HBM as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Write - PCIe(r) Stall + - The number of cycles the L2-Fabric interface was stalled on write or atomic requests to remote PCIe(r) connected accelerators{sup}`1` or CPUs as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Write - Infinity Fabric(tm) Stall + - The number of cycles the L2-Fabric interface was stalled on write or atomic requests to remote Infinity Fabric(tm) connected accelerators{sup}`1` or CPUs as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Write - HBM Stall + - The number of cycles the L2-Fabric interface was stalled on write or atomic requests to accelerator's local HBM as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +* - Write - Credit Starvation + - The number of cycles the L2-Fabric interface was stalled on write or atomic requests to any memory location because too many write/atomic requests were currently in flight, as a percent of the [total active L2 cycles](TotalActiveL2Cycles). + - Percent +``` + +```{note} +{sup}`1` In addition to being used for on-accelerator data-traffic, AMD [Infinity Fabric](https://www.amd.com/en/technologies/infinity-architecture)(tm) technology can be used to connect multiple accelerators to achieve advanced peer-to-peer connectivity and enhanced bandwidths over traditional PCIe(r) connections. +Some AMD Instinct(tm) MI accelerators, e.g., the MI250X, [feature coherent CPU↔accelerator connections built using AMD Infinity Fabric(tm)](https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf) +``` + +```{warning} +On current CDNA accelerators and GCN GPUs, these L2↔Fabric stalls can be undercounted in some circumstances. +``` + +(SE)= +## Shader Engine (SE) + +The [CUs](CU) on a CDNA accelerator are grouped together into a higher-level organizational unit called a Shader Engine (SE): + +```{figure} images/selayout.png +:alt: Example of CU-grouping into shader-engines on AMD Instinct(tm) MI accelerators. +:align: center +:name: selayout-fig + +Example of CU-grouping into shader-engines on AMD Instinct(tm) MI accelerators. +``` + +The number of CUs on a SE varies from chip-to-chip (see, for example [AMD GPU HIP Training](https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf), slide 20). +In addition, newer accelerators such as the AMD Instinct(tm) MI 250X have 8 SEs per accelerator. + +For the purposes of Omniperf, we consider resources that are shared between multiple CUs on a single SE as part of the SE's metrics. +These include: + - the [scalar L1 data cache](sL1D) + - the [L1 instruction cache](L1I) + - the [workgroup manager](SPI) + +(sL1D)= +### Scalar L1 Data Cache (sL1D) + +The Scalar L1 Data cache (sL1D) can cache data accessed from scalar load instructions (and scalar store instructions on architectures where they exist) from wavefronts in the [CUs](CU). +The sL1D is shared between multiple CUs ([GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 36) --- the exact number of CUs depends on the architecture in question (3 CUs in GCN GPUs and MI100, 2 CUs in [MI2XX](2xxnote)) --- and is backed by the [L2](L2) cache. + +In typical usage, the data in the sL1D is comprised of (e.g.,): + - Kernel arguments, e.g., pointers, [non-populated](https://llvm.org/docs/AMDGPUUsage.html#amdgpu-amdhsa-sgpr-register-set-up-order-table) grid/block dimensions, etc. + - HIP's `__constant__` memory, when accessed in a provably uniform{sup}`1` manner + - Other memory, when accessed in a provably uniform manner, *and* the backing memory is provably constant{sup}`1` + +```{note} +{sup}`1` +The scalar data cache is used when the compiler emits scalar loads to access data. +This requires that the data be _provably_ uniformly accessed (i.e., the compiler can verify that all work-items in a wavefront access the same data), _and_ that the data can be proven to be read-only (e.g., HIP's `__constant__` memory, or properly `__restrict__`'ed pointers to avoid write-aliasing). +Access of e.g., `__constant__` memory is not guaranteed to go through the sL1D if, e.g., the wavefront loads a non-uniform value. +``` + +(sL1D_SOL)= +#### Scalar L1D Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. +``` + +The Scalar L1D speed-of-light chart shows some key metrics of the sL1D cache as a comparison with the peak achievable values of those metrics: + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Bandwidth + - The number of bytes looked up in the sL1D cache, as a percent of the peak theoretical bandwidth. Calculated as the ratio of sL1D requests over the [total sL1D cycles](TotalSL1DCycles). + - Percent +* - Cache Hit Rate + - The percent of sL1D requests that hit{sup}`1` on a previously loaded line in the cache. Calculated as the ratio of the number of sL1D requests that hit over the number of all sL1D requests. + - Percent +* - sL1D-L2 BW + - The number of bytes requested by the sL1D from the L2 cache, as a percent of the peak theoretical sL1D → L2 cache bandwidth. Calculated as the ratio of the total number of requests from the sL1D to the L2 cache over the [total sL1D-L2 interface cycles](TotalSL1DCycles). + - Percent +``` + +```{note} +{sup}`1` Unlike the [vL1D](vL1D) and [L2](L2) caches, the sL1D cache on AMD Instinct(tm) MI CDNA accelerators does _not_ use "hit-on-miss" approach to reporting cache hits. +That is, if while satisfying a miss, another request comes in that would hit on the same pending cache line, the subsequent request will be counted as a 'duplicated miss' (see below). +``` + +#### Scalar L1D Cache Accesses + +This panel gives more detail on the types of accesses made to the sL1D, and the hit/miss statistics. + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Requests + - The total number of requests, of any size or type, made to the sL1D per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Hits + - The total number of sL1D requests that hit on a previously loaded cache line, per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Misses - Non Duplicated + - The total number of sL1D requests that missed on a cache line that *was not* already pending due to another request, per [normalization-unit](normunit). See note in [speed-of-light section](sL1D_SOL) for more detail. + - Requests per [normalization-unit](normunit) +* - Misses - Duplicated + - The total number of sL1D requests that missed on a cache line that *was* already pending due to another request, per [normalization-unit](normunit). See note in [speed-of-light section](sL1D_SOL) for more detail. + - Requests per [normalization-unit](normunit) +* - Cache Hit Rate + - Indicates the percent of sL1D requests that hit on a previously loaded line the cache. The ratio of the number of sL1D requests that hit{sup}`1` over the number of all sL1D requests. + - Percent +* - Read Requests (Total) + - The total number of sL1D read requests of any size, per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Atomic Requests + - The total number of sL1D atomic requests of any size, per [normalization-unit](normunit). Typically unused on CDNA accelerators. + - Requests per [normalization-unit](normunit) +* - Read Requests (1 DWord) + - The total number of sL1D read requests made for a single dword of data (4B), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Read Requests (2 DWord) + - The total number of sL1D read requests made for a two dwords of data (8B), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Read Requests (4 DWord) + - The total number of sL1D read requests made for a four dwords of data (16B), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Read Requests (8 DWord) + - The total number of sL1D read requests made for a eight dwords of data (32B), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Read Requests (16 DWord) + - The total number of sL1D read requests made for a sixteen dwords of data (64B), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +``` + +```{note} +{sup}`1`Unlike the [vL1D](vL1D) and [L2](L2) caches, the sL1D cache on AMD Instinct(tm) MI CDNA accelerators does _not_ use "hit-on-miss" approach to reporting cache hits. +That is, if while satisfying a miss, another request comes in that would hit on the same pending cache line, the subsequent request will be counted as a 'duplicated miss' (see below). +``` + +#### sL1D ↔ L2 Interface + +This panel gives more detail on the data requested across the sL1D↔[L2](L2) interface. + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - sL1D-L2 BW + - The total number of bytes read from/written to/atomically updated across the sL1D↔[L2](L2) interface, per [normalization-unit](normunit). Note that sL1D writes and atomics are typically unused on current CDNA accelerators, so in the majority of cases this can be interpreted as an sL1D→L2 read bandwidth. + - Bytes per [normalization-unit](normunit) +* - Read Requests + - The total number of read requests from sL1D to the [L2](L2), per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Write Requests + - The total number of write requests from sL1D to the [L2](L2), per [normalization-unit](normunit). Typically unused on current CDNA accelerators. + - Requests per [normalization-unit](normunit) +* - Atomic Requests + - The total number of atomic requests from sL1D to the [L2](L2), per [normalization-unit](normunit). Typically unused on current CDNA accelerators. + - Requests per [normalization-unit](normunit) +* - Stall Cycles + - The total number of cycles the sL1D↔[L2](L2) interface was stalled, per [normalization-unit](normunit). + - Cycles per [normalization-unit](normunit) +``` + +(L1I)= +### L1 Instruction Cache (L1I) + +As with the [sL1D](sL1D), the L1 Instruction (L1I) cache is shared between multiple CUs on a shader-engine, where the precise number of CUs sharing a L1I depends on the architecture in question ([GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 36) and is backed by the [L2](L2) cache. +Unlike the sL1D, the instruction cache is read-only. + +(L1I_SOL)= +#### L1I Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. +``` + +The L1 Instruction Cache speed-of-light chart shows some key metrics of the L1I cache as a comparison with the peak achievable values of those metrics: + +```{list-table} +:header-rows: 1 +:widths: 15 70 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Bandwidth + - The number of bytes looked up in the L1I cache, as a percent of the peak theoretical bandwidth. Calculated as the ratio of L1I requests over the [total L1I cycles](TotalL1ICycles). + - Percent +* - Cache Hit Rate + - The percent of L1I requests that hit on a previously loaded line the cache. Calculated as the ratio of the number of L1I requests that hit{sup}`1` over the number of all L1I requests. + - Percent +* - L1I-L2 BW + - The percent of the peak theoretical L1I → L2 cache request bandwidth achieved. Calculated as the ratio of the total number of requests from the L1I to the L2 cache over the [total L1I-L2 interface cycles](TotalL1ICycles). + - Percent +* - Instruction Fetch Latency + - The average number of cycles spent to fetch instructions to a [CU](cu). + - Cycles +``` + +```{note} +{sup}`1`Unlike the [vL1D](vL1D) and [L2](L2) caches, the L1I cache on AMD Instinct(tm) MI CDNA accelerators does _not_ use "hit-on-miss" approach to reporting cache hits. +That is, if while satisfying a miss, another request comes in that would hit on the same pending cache line, the subsequent request will be counted as a 'duplicated miss' (see below). +``` + +#### L1I Cache Accesses + +This panel gives more detail on the hit/miss statistics of the L1I: + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Requests + - The total number of requests made to the L1I per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Hits + - The total number of L1I requests that hit on a previously loaded cache line, per [normalization-unit](normunit). + - Requests per [normalization-unit](normunit) +* - Misses - Non Duplicated + - The total number of L1I requests that missed on a cache line that *was not* already pending due to another request, per [normalization-unit](normunit). See note in [speed-of-light section](L1I_SOL) for more detail. + - Requests per [normalization-unit](normunit) +* - Misses - Duplicated + - The total number of L1I requests that missed on a cache line that *was* already pending due to another request, per [normalization-unit](normunit). See note in [speed-of-light section](L1I_SOL) for more detail. + - Requests per [normalization-unit](normunit) +* - Cache Hit Rate + - The percent of L1I requests that hit{sup}`1` on a previously loaded line the cache. Calculated as the ratio of the number of L1I requests that hit over the the number of all L1I requests. + - Percent +``` + +```{note} +{sup}`1`Unlike the [vL1D](vL1D) and [L2](L2) caches, the L1I cache on AMD Instinct(tm) MI CDNA accelerators does _not_ use "hit-on-miss" approach to reporting cache hits. +That is, if while satisfying a miss, another request comes in that would hit on the same pending cache line, the subsequent request will be counted as a 'duplicated miss' (see below). +``` + +#### L1I - L2 Interface + +This panel gives more detail on the data requested across the L1I-[L2](L2) interface. + +```{list-table} +:header-rows: 1 +:widths: 18 65 17 +:class: noscroll-table +* - Metric + - Description + - Unit +* - L1I-L2 BW + - The total number of bytes read across the L1I-[L2](L2) interface, per [normalization-unit](normunit). + - Bytes per [normalization-unit](normunit) +``` + +(SPI)= +### Workgroup manager (SPI) + +The workgroup manager (SPI) is the bridge between the [command processor](CP) and the [compute units](CU). +After the [command processor](cp) processes a kernel dispatch, it will then pass the dispatch off to the workgroup manager, which then schedules [workgroups](workgroup) onto the [compute units](CU). +As workgroups complete execution and resources become available, the workgroup manager will schedule new workgroups onto [compute units](CU). +The workgroup manager's metrics therefore are focused on reporting, e.g.: + + - Utilizations of various parts of the accelerator that the workgroup manager interacts with (and the workgroup manager itself) + - How many workgroups were dispatched, their size, and how many resources they used + - Percent of scheduler opportunities (cycles) where workgroups failed to dispatch, and + - Percent of scheduler opportunities (cycles) where workgroups failed to dispatch due to lack of a specific resource on the CUs (e.g., too many VGPRs allocated) + +This gives the user an idea of why the workgroup manager couldn't schedule more wavefronts onto the device, and is most useful for workloads that the user suspects to be scheduling/launch-rate limited. + +As discussed in the [command processor](cp) description, the command processor on AMD Instinct(tm) MI architectures contains four hardware scheduler-pipes, each with eight software threads ([“Vega10” - Mantor](https://old.hotchips.org/wp-content/uploads/hc_archives/hc29/HC29.21-Monday-Pub/HC29.21.10-GPU-Gaming-Pub/HC29.21.120-Radeon-Vega10-Mantor-AMD-f1.pdf), slide 19). +Each scheduler-pipe can issue a kernel dispatch to the workgroup manager to schedule concurrently. +Therefore, some workgroup manager metrics are presented relative to the utilization of these scheduler-pipes (e.g., whether all four are issuing concurrently). + +```{note} +Current versions of the profiling libraries underlying Omniperf attempt to serialize concurrent kernels running on the accelerator, as the performance counters on the device are global (i.e., shared between concurrent kernels). +This means that these scheduler-pipe utilization metrics are expected to reach e.g., a maximum of one pipe active, i.e., only 25\%. +``` + +#### Workgroup Manager Utilizations + +This section describes the utilization of the workgroup manager, and the hardware components it interacts with. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Accelerator Utilization + - The percent of cycles in the kernel where the accelerator was actively doing any work. + - Percent +* - Scheduler-Pipe Utilization + - The percent of [total scheduler-pipe cycles](TotalPipeCycles) in the kernel where the scheduler-pipes were actively doing any work. Note: this value is expected to range between 0-25%, see note in [workgroup-manager](SPI) description. + - Percent +* - Workgroup Manager Utilization + - The percent of cycles in the kernel where the Workgroup Manager was actively doing any work. + - Percent +* - Shader Engine Utilization + - The percent of [total shader-engine cycles](TotalSECycles) in the kernel where any CU in a shader-engine was actively doing any work, normalized over all shader-engines. Low values (e.g., << 100%) indicate that the accelerator was not fully saturated by the kernel, or a potential load-imbalance issue. + - Percent +* - SIMD Utilization + - The percent of [total SIMD cycles](TotalSIMDCycles) in the kernel where any [SIMD](VALU) on a CU was actively doing any work, summed over all CUs. Low values (e.g., << 100%) indicate that the accelerator was not fully saturated by the kernel, or a potential load-imbalance issue. + - Percent +* - Dispatched Workgroups + - The total number of workgroups forming this kernel launch. + - Workgroups +* - Dispatched Wavefronts + - The total number of wavefronts, summed over all workgroups, forming this kernel launch. + - Wavefronts +* - VGPR Writes + - The average number of cycles spent initializing [VGPRs](valu) at wave creation. + - Cycles/wave +* - SGPR Writes + - The average number of cycles spent initializing [SGPRs](salu) at wave creation. + - Cycles/wave +``` + +#### Workgroup Manager - Resource Allocation + +This panel gives more detail on how workgroups/wavefronts were scheduled onto compute units, and what occupancy limiters they hit (if any). +When analyzing these metrics, the user should also take into account their achieved occupancy (i.e., [Wavefront occupancy](Wavefront_runtime_stats)). +A kernel may be occupancy limited by e.g., LDS usage, but may still achieve high occupancy levels such that improving occupancy further may not improve performance. +See the [Workgroup Manager - Occupancy Limiters](Occupancy_example) example for more details. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - Not-scheduled Rate (Workgroup Manager) + - The percent of [total scheduler-pipe cycles](TotalPipeCycles) in the kernel where a workgroup could not be scheduled to a [CU](CU) due to a bottleneck within the workgroup manager rather than a lack of a [CU](CU)/[SIMD](VALU) with sufficient resources. Note: this value is expected to range between 0-25%, see note in [workgroup-manager](SPI) description. + - Percent +* - Not-scheduled Rate (Scheduler-Pipe) + - The percent of [total scheduler-pipe cycles](TotalPipeCycles) in the kernel where a workgroup could not be scheduled to a [CU](CU) due to a bottleneck within the scheduler-pipes rather than a lack of a [CU](CU)/[SIMD](VALU) with sufficient resources. Note: this value is expected to range between 0-25%, see note in [workgroup-manager](SPI) description. + - Percent +* - Scheduler-Pipe Stall Rate + - The percent of [total scheduler-pipe cycles](TotalPipeCycles) in the kernel where a workgroup could not be scheduled to a [CU](CU) due to occupancy limitations (i.e., a lack of a [CU](CU)/[SIMD](VALU) with sufficient resources). Note: this value is expected to range between 0-25%, see note in [workgroup-manager](SPI) description. + - Percent +* - Scratch Stall Rate + - The percent of [total shader-engine cycles](TotalSECycles) in the kernel where a workgroup could not be scheduled to a [CU](CU) due to lack of [private (a.k.a., scratch) memory](Mtype) slots. While this can reach up to 100\%, we note that the actual occupancy limitations on a kernel using private memory are typically quite small (e.g., <1\% of the total number of waves that can be scheduled to an accelerator). + - Percent +* - Insufficient SIMD Waveslots + - The percent of [total SIMD cycles](TotalSIMDCycles) in the kernel where a workgroup could not be scheduled to a [SIMD](valu) due to lack of available [waveslots](valu). + - Percent +* - Insufficient SIMD VGPRs + - The percent of [total SIMD cycles](TotalSIMDCycles) in the kernel where a workgroup could not be scheduled to a [SIMD](valu) due to lack of available [VGPRs](valu). + - Percent +* - Insufficient SIMD SGPRs + - The percent of [total SIMD cycles](TotalSIMDCycles) in the kernel where a workgroup could not be scheduled to a [SIMD](valu) due to lack of available [SGPRs](salu). + - Percent +* - Insufficient CU LDS + - The percent of [total CU cycles](TotalCUCycles) in the kernel where a workgroup could not be scheduled to a [CU](cu) due to lack of available [LDS](lds). + - Percent +* - Insufficient CU Barriers + - The percent of [total CU cycles](TotalCUCycles) in the kernel where a workgroup could not be scheduled to a [CU](cu) due to lack of available [barriers](barrier). + - Percent +* - Reached CU Workgroup Limit + - The percent of [total CU cycles](TotalCUCycles) in the kernel where a workgroup could not be scheduled to a [CU](cu) due to limits within the workgroup manager. This is expected to be always be zero on CDNA2 or newer accelerators (and small for previous accelerators). + - Percent +* - Reached CU Wavefront Limit + - The percent of [total CU cycles](TotalCUCycles) in the kernel where a wavefront could not be scheduled to a [CU](cu) due to limits within the workgroup manager. This is expected to be always be zero on CDNA2 or newer accelerators (and small for previous accelerators). + - Percent +``` + +(CP)= +## Command Processor (CP) + +The command processor -- a.k.a., the CP -- is responsible for interacting with the AMDGPU Kernel Driver (a.k.a., the Linux Kernel) on the CPU and for interacting with user-space HSA clients when they submit commands to HSA queues. +Basic tasks of the CP include reading commands (e.g., corresponding to a kernel launch) out of [HSA Queues](http://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf) (Sec. 2.5), scheduling work to subsequent parts of the scheduler pipeline, and marking kernels complete for synchronization events on the host. + +The command processor is composed of two sub-components: + + - Fetcher (CPF): Fetches commands out of memory to hand them over to the CPC for processing + - Packet Processor (CPC): The micro-controller running the command processing firmware that decodes the fetched commands, and (for kernels) passes them to the [Workgroup Processors](SPI) for scheduling + +Before scheduling work to the accelerator, the command-processor can first acquire a memory fence to ensure system consistency [(Sec 2.6.4)](http://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf). +After the work is complete, the command-processor can apply a memory-release fence. +Depending on the AMD CDNA accelerator under question, either of these operations _may_ initiate a cache write-back or invalidation. + +Analyzing command processor performance is most interesting for kernels that the user suspects to be scheduling/launch-rate limited. +The command processor's metrics therefore are focused on reporting, e.g.: + + - Utilization of the fetcher + - Utilization of the packet processor, and decoding processing packets + - Fetch/processing stalls + +### Command Processor Fetcher (CPF) Metrics + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - CPF Utilization + - Percent of total cycles where the CPF was busy actively doing any work. The ratio of CPF busy cycles over total cycles counted by the CPF. + - Percent +* - CPF Stall + - Percent of CPF busy cycles where the CPF was stalled for any reason. + - Percent +* - CPF-L2 Utilization + - Percent of total cycles counted by the CPF-[L2](L2) interface where the CPF-L2 interface was active doing any work. The ratio of CPF-L2 busy cycles over total cycles counted by the CPF-L2. + - Percent +* - CPF-L2 Stall + - Percent of CPF-L2 busy cycles where the CPF-[L2](L2) interface was stalled for any reason. + - Percent +* - CPF-UTCL1 Stall + - Percent of CPF busy cycles where the CPF was stalled by address translation. + - Percent +``` + +### Command Processor Packet Processor (CPC) Metrics + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - CPC Utilization + - Percent of total cycles where the CPC was busy actively doing any work. The ratio of CPC busy cycles over total cycles counted by the CPC. + - Percent +* - CPC Stall + - Percent of CPC busy cycles where the CPC was stalled for any reason. + - Percent +* - CPC Packet Decoding Utilization + - Percent of CPC busy cycles spent decoding commands for processing. + - Percent +* - CPC-Workgroup Manager Utilization + - Percent of CPC busy cycles spent dispatching workgroups to the [Workgroup Manager](SPI). + - Percent +* - CPC-L2 Utilization + - Percent of total cycles counted by the CPC-[L2](L2) interface where the CPC-L2 interface was active doing any work. + - Percent +* - CPC-UTCL1 Stall + - Percent of CPC busy cycles where the CPC was stalled by address translation. + - Percent +* - CPC-UTCL2 Utilization + - Percent of total cycles counted by the CPC's L2 address translation interface where the CPC was busy doing address translation work. + - Percent +``` + +## System Speed-of-Light + +```{warning} +The theoretical maximum throughput for some metrics in this section are currently computed with the maximum achievable clock frequency, as reported by `rocminfo`, for an accelerator. This may not be realistic for all workloads. + +In addition, not all metrics (e.g., FLOP counters) are available on all AMD Instinct(tm) MI accelerators. +For more detail on how operations are counted, see the [FLOP counting convention](FLOP_count) section. +``` + +Finally, the system speed-of-light summarizes some of the key metrics from various sections of Omniperf's profiling report. + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Metric + - Description + - Unit +* - [VALU](valu) FLOPs + - The total floating-point operations executed per second on the [VALU](valu). This is also presented as a percent of the peak theoretical FLOPs achievable on the specific accelerator. Note: this does not include any floating-point operations from [MFMA](mfma) instructions. + - GFLOPs +* - [VALU](valu) IOPs + - The total integer operations executed per second on the [VALU](valu). This is also presented as a percent of the peak theoretical IOPs achievable on the specific accelerator. Note: this does not include any integer operations from [MFMA](mfma) instructions. + - GIOPs +* - [MFMA](mfma) FLOPs (BF16) + - The total number of 16-bit brain floating point [MFMA](mfma) operations executed per second. Note: this does not include any 16-bit brain floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical BF16 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - [MFMA](mfma) FLOPs (F16) + - The total number of 16-bit floating point [MFMA](mfma) operations executed per second. Note: this does not include any 16-bit floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical F16 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - [MFMA](mfma) FLOPs (F32) + - The total number of 32-bit floating point [MFMA](mfma) operations executed per second. Note: this does not include any 32-bit floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical F32 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - [MFMA](mfma) FLOPs (F64) + - The total number of 64-bit floating point [MFMA](mfma) operations executed per second. Note: this does not include any 64-bit floating point operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical F64 MFMA operations achievable on the specific accelerator. + - GFLOPs +* - [MFMA](mfma) IOPs (INT8) + - The total number of 8-bit integer [MFMA](mfma) operations executed per second. Note: this does not include any 8-bit integer operations from [VALU](valu) instructions. This is also presented as a percent of the peak theoretical INT8 MFMA operations achievable on the specific accelerator. + - GIOPs +* - [SALU](salu) Utilization + - Indicates what percent of the kernel's duration the [SALU](salu) was busy executing instructions. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [SALU](salu) / [SMEM](salu) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - [VALU](valu) Utilization + - Indicates what percent of the kernel's duration the [VALU](valu) was busy executing instructions. Does not include [VMEM](valu) operations. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [VALU](valu) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - [MFMA](mfma) Utilization + - Indicates what percent of the kernel's duration the [MFMA](mfma) unit was busy executing instructions. Computed as the ratio of the total number of cycles the [MFMA](mfma) was busy over the [total CU cycles](TotalCUCycles). + - Percent +* - [VMEM](valu) Utilization + - Indicates what percent of the kernel's duration the [VMEM](valu) unit was busy executing instructions, including both global/generic and spill/scratch operations (see the [VMEM instruction count metrics](TA_inst) for more detail). Does not include [VALU](valu) operations. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [VMEM](valu) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - [Branch](branch) Utilization + - Indicates what percent of the kernel's duration the [Branch](branch) unit was busy executing instructions. Computed as the ratio of the total number of cycles spent by the [scheduler](scheduler) issuing [Branch](branch) instructions over the [total CU cycles](TotalCUCycles). + - Percent +* - [VALU](valu) Active Threads + - Indicates the average level of [divergence](Divergence) within a wavefront over the lifetime of the kernel. The number of work-items that were active in a wavefront during execution of each [VALU](valu) instruction, time-averaged over all VALU instructions run on all wavefronts in the kernel. + - Work-items +* - IPC + - The ratio of the total number of instructions executed on the [CU](cu) over the [total active CU cycles](TotalActiveCUCycles). This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - Instructions per-cycle +* - Wavefront Occupancy + - The time-averaged number of wavefronts resident on the accelerator over the lifetime of the kernel. Note: this metric may be inaccurate for short-running kernels (<< 1ms). This is also presented as a percent of the peak theoretical occupancy achievable on the specific accelerator. + - Wavefronts +* - [LDS](lds) Theoretical Bandwidth + - Indicates the maximum amount of bytes that could have been loaded from/stored to/atomically updated in the LDS per unit time (see [LDS Bandwidth](lds_bandwidth) example for more detail). This is also presented as a percent of the peak theoretical F64 MFMA operations achievable on the specific accelerator. + - GB/s +* - [LDS](lds) Bank Conflicts/Access + - The ratio of the number of cycles spent in the [LDS scheduler](lds) due to bank conflicts (as determined by the conflict resolution hardware) to the base number of cycles that would be spent in the LDS scheduler in a completely uncontended case. This is also presented in normalized form (i.e., the Bank Conflict Rate). + - Conflicts/Access +* - [vL1D](vL1D) Cache Hit Rate + - The ratio of the number of vL1D cache line requests that hit in vL1D cache over the total number of cache line requests to the [vL1D Cache RAM](TC). + - Percent +* - [vL1D](vL1D) Cache Bandwidth + - The number of bytes looked up in the vL1D cache as a result of [VMEM](VALU) instructions per unit time. The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - GB/s +* - [L2](L2) Cache Hit Rate + - The ratio of the number of L2 cache line requests that hit in the L2 cache over the total number of incoming cache line requests to the L2 cache. + - Percent +* - [L2](L2) Cache Bandwidth + - The number of bytes looked up in the L2 cache per unit time. The number of bytes is calculated as the number of cache lines requested multiplied by the cache line size. This value does not consider partial requests, so e.g., if only a single value is requested in a cache line, the data movement will still be counted as a full cache line. This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - GB/s +* - [L2](L2)-Fabric Read BW + - The number of bytes read by the L2 over the [Infinity Fabric(tm) interface](l2fabric) per unit time. This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - GB/s +* - [L2](L2)-Fabric Write and Atomic BW + - The number of bytes sent by the L2 over the [Infinity Fabric(tm) interface](l2fabric) by write and atomic operations per unit time. This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - GB/s +* - [L2](L2)-Fabric Read Latency + - The time-averaged number of cycles read requests spent in Infinity Fabric(tm) before data was returned to the L2. + - Cycles +* - [L2](L2)-Fabric Write Latency + - The time-averaged number of cycles write requests spent in Infinity Fabric(tm) before a completion acknowledgement was returned to the L2. + - Cycles +* - [sL1D](sL1D) Cache Hit Rate + - The percent of sL1D requests that hit on a previously loaded line the cache. Calculated as the ratio of the number of sL1D requests that hit over the number of all sL1D requests. + - Percent +* - [sL1D](sL1D) Bandwidth + - The number of bytes looked up in the sL1D cache per unit time. This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - GB/s +* - [L1I](L1I) Bandwidth + - The number of bytes looked up in the L1I cache per unit time. This is also presented as a percent of the peak theoretical bandwidth achievable on the specific accelerator. + - GB/s +* - [L1I](L1I) Cache Hit Rate + - The percent of L1I requests that hit on a previously loaded line the cache. Calculated as the ratio of the number of L1I requests that hit over the number of all L1I requests. + - Percent +* - [L1I](L1I) Fetch Latency + - The average number of cycles spent to fetch instructions to a [CU](cu). + - Cycles +``` + +## References + +- [AMD GPU HIP Training](https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf) +- [CDNA2 ISA Documentation](https://developer.amd.com/wp-content/resources/CDNA2_Shader_ISA_4February2022.pdf) +- [HSA Runtime Programmer’s Reference Manual](http://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf) +- [GS-4106 The AMD GCN Architecture - A Crash Course, by Layla Mah](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah) +- [AMD RADEON™ HD 7970 WITH GRAPHICS CORE NEXT (GCN) ARCHITECTURE, by Mike Mantor](https://old.hotchips.org/wp-content/uploads/hc_archives/hc24/HC24-3-ManyCore/HC24.28.315-AMD.GCN.mantor_v1.pdf) +- [AMD’s Radeon Next Generation GPU Architecture “Vega10”, by Mike Mantor](https://old.hotchips.org/wp-content/uploads/hc_archives/hc29/HC29.21-Monday-Pub/HC29.21.10-GPU-Gaming-Pub/HC29.21.120-Radeon-Vega10-Mantor-AMD-f1.pdf) +- [CDNA2 Whitepaper](https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf) +- [LLVM's User Guide for AMDGPU Backend](https://llvm.org/docs/AMDGPUUsage.html) + +## Disclaimer + +PCIe(r) is a registered trademark of PCI-SIG Corporation. + + +# Definitions + +## Miscellaneous + +(TotalActiveCUCycles)= +(TotalCUCycles)= +(TotalSL1DCycles)= +(TotalL1ICycles)= +(TotalL2Cycles)= +(TotalActiveL2Cycles)= +(TotalPipeCycles)= +(TotalSECycles)= +(TotalSIMDCycles)= +(ThreadRequests)= +(Wavefront)= +(Workitem)= +(Workgroup)= +(Divergence)= +(KernelCycles)= +(KernelTime)= + +```{list-table} +:header-rows: 1 +:widths: 20 65 15 +:class: noscroll-table +* - Name + - Description + - Unit +* - Kernel Time + - The number of seconds the accelerator was executing a kernel, from the [Command Processor](CP)'s start-of-kernel timestamp (which is a number of cycles after the CP begins processing the packet) to the CP's end-of-kernel timestamp (which is a number of cycles before the CP stops processing the packet. + - Seconds +* - Kernel Cycles + - The number of cycles the accelerator was active doing _any_ work, as measured by the [Command Processor](CP). + - Cycles +* - Total CU Cycles + - The number of cycles the accelerator was active doing _any_ work (i.e., Kernel Cycles), multiplied by the number of [compute units](CU) on the accelerator. A measure of the total possible active cycles the compute units could be doing work, useful for normalization of metrics inside the CU. + - Cycles +* - Total Active CU Cycles + - The number of cycles a CU on the accelerator was active doing _any_ work, summed over all [compute units](CU) on the accelerator. + - Cycles +* - Total SIMD Cycles + - The number of cycles the accelerator was active doing _any_ work (i.e., Kernel Cycles), multiplied by the number of [SIMDs](CU) on the accelerator. A measure of the total possible active cycles the SIMDs could be doing work, useful for normalization of metrics inside the CU. + - Cycles +* - Total L2 Cycles + - The number of cycles the accelerator was active doing _any_ work (i.e., Kernel Cycles), multiplied by the number of [L2](L2) channels on the accelerator. A measure of the total possible active cycles the L2 channels could be doing work, useful for normalization of metrics inside the L2. + - Cycles +* - Total Active L2 Cycles + - The number of cycles a channel of the L2 cache was active doing _any_ work, summed over all [L2](L2) channels on the accelerator. + - Cycles +* - Total sL1D Cycles + - The number of cycles the accelerator was active doing _any_ work (i.e., Kernel Cycles), multiplied by the number of [scalar L1 Data caches](sL1D) on the accelerator. A measure of the total possible active cycles the sL1Ds could be doing work, useful for normalization of metrics inside the sL1D. + - Cycles +* - Total L1I Cycles + - The number of cycles the accelerator was active doing _any_ work (i.e., Kernel Cycles), multiplied by the number of [L1 Instruction caches](L1I) on the accelerator. A measure of the total possible active cycles the L1Is could be doing work, useful for normalization of metrics inside the L1I. + - Cycles +* - Total Scheduler-Pipe Cycles + - The number of cycles the accelerator was active doing _any_ work (i.e., Kernel Cycles), multiplied by the number of [scheduler pipes](CP) on the accelerator. A measure of the total possible active cycles the scheduler-pipes could be doing work, useful for normalization of metrics inside the [workgroup manager](SPI) and [command processor](CP). + - Cycles +* - Total Shader-Engine Cycles + - The total number of cycles the accelerator was active doing _any_ work, multiplied by the number of [Shader Engines](SE) on the accelerator. A measure of the total possible active cycles the Shader Engines could be doing work, useful for normalization of metrics inside the [workgroup manager](SPI). + - Cycles +* - Thread-requests + - The number of unique memory addresses accessed by a single memory instruction. On AMD's Instinct(tm) accelerators, this a maximum of 64 (i.e., the size of the wavefront). + - Addresses +* - Work-item + - A single 'thread' (lane) of execution, that executes in lockstep with the rest of the work-items comprising a [wavefront](Wavefront) of execution. + - N/A +* - Wavefront + - A group of work-items, or threads, that execute in lockstep on the [compute-unit](CU). On AMD's Instinct(tm) accelerators, the wavefront size is always 64 work-items. + - N/A +* - Workgroup + - A group of wavefronts that execute on the same [compute-unit](CU), and can cooperatively execute and share data via the use of synchronization primitives, [LDS](lds), atomics, etc. + - N/A +* - Divergence + - Divergence within a wavefront occurs when not all work-items are active when executing an instruction, e.g., due to non-uniform control flow within a wavefront. Can reduce overall execution efficiency by causing e.g., the [VALU](valu) to have to execute both branches of a conditional with different sets of work-items active. + - N/A +``` + +(normunit)= +## Normalization units + +A user-configurable unit by which the user can choose to normalize data. Choices include: + +```{list-table} +:header-rows: 1 +:widths: 20 80 +:class: noscroll-table +* - Name + - Description +* - `per_cycle` + - The total value of the measured counter/metric that occurred per kernel invocation divided by the [Kernel Cycles](KernelCycles), i.e., total number of cycles the kernel executed as measured by the [Command Processor](CP). +* - `per_wave` + - The total value of the measured counter/metric that occurred per kernel invocation divided by the total number of [wavefronts](wavefront) launched in the kernel. +* - `per_kernel` + - The total value of the measured counter/metric that occurred per kernel invocation. +* - `per_second` + - The total value of the measured counter/metric that occurred per kernel invocation divided by the [Kernel Time](KernelTime), i.e., the total runtime of the kernel in seconds, as measured by the [Command Processor](CP). +``` + +By default, Omniperf uses the `per_wave` normalization. The appropriate normalization will vary depending on your use case. +For instance, a `per_second` normalization may be useful for FLOP or bandwidth comparisons, while a `per_wave` normalization may be useful (e.g.,) to see how many (and what types) of instructions are used per wavefront, and a `per_kernel` normalization may be useful to get the total aggregate values of metrics for comparison between different configurations. + +(Mspace)= +## Memory Spaces + +AMD Instinct(tm) MI accelerators can access memory through multiple address spaces which may map to different physical memory locations on the system. +The [table below](mspace-table) provides a view of how various types of memory used in HIP map onto these constructs: + +```{list-table} Memory / Address space terminology +:header-rows: 1 +:name: mspace-table +:class: noscroll-table + +* - LLVM Address Space + - Hardware Memory Space + - HIP Terminology +* - Generic + - Flat + - N/A +* - Global + - Global + - Global +* - Local + - LDS + - LDS/Shared +* - Private + - Scratch + - Private +* - Constant + - Same as global + - Constant +``` + +Below is a high-level description of the address spaces in the AMDGPU backend of LLVM: + +```{list-table} +:header-rows: 1 +:widths: 20 80 +:class: noscroll-table + +* - Address space + - Description +* - Global + - Memory that can be seen by all threads in a process, and may be backed by the local accelerator's HBM, a remote accelerator's HBM, or the CPU's DRAM. +* - Local + - Memory that is only visible to a particular workgroup. On AMD's Instinct(tm) accelerator hardware, this is stored in [LDS](LDS) memory. +* - Private + - Memory that is only visible to a particular [work-item](workitem) (thread), stored in the scratch space on AMD's Instinct(tm) accelerators. +* - Constant + - Read-only memory that is in the global address space and stored on the local accelerator's HBM. +* - Generic + - Used when the compiler cannot statically prove that a pointer is addressing memory in a single (non-generic) address space. Mapped to Flat on AMD's Instinct(tm) accelerators, the pointer could dynamically address global, local, private or constant memory. +``` + +[LLVM's documentation for AMDGPU Backend](https://llvm.org/docs/AMDGPUUsage.html#address-spaces) will always have the most up-to-date information, and the interested reader is referred to this source for a more complete explanation. + +(Mtype)= +## Memory Type + +AMD Instinct(tm) accelerators contain a number of different memory allocation types to enable the HIP language's [memory coherency model](https://rocm.docs.amd.com/projects/HIP/en/latest/user_guide/programming_manual.html#coherency-controls). +These memory types are broadly similar between AMD Instinct(tm) accelerator generations, but may differ in exact implementation. + +In addition, these memory types _may_ differ between accelerators on the same system, even when accessing the same memory allocation. +For example, an [MI2XX](2xxnote) accelerator accessing "fine-grained" memory allocated local to that device may see the allocation as coherently cachable, while a remote accelerator might see the same allocation as uncached. + +These memory types include: + +```{list-table} +:header-rows: 1 +:widths: 20 80 +:class: noscroll-table + * - Memory type + - Description + * - Uncached Memory (UC) + - Memory that will not be cached in this accelerator. On [MI2XX](2xxnote) accelerators, this corresponds "fine-grained" (a.k.a., "coherent") memory allocated on a remote accelerator or the host, e.g., using `hipHostMalloc` or `hipMallocManaged` with default allocation flags. + * - Non-hardware-Coherent Memory (NC) + - Memory that will be cached by the accelerator, and is only guaranteed to be consistent at kernel boundaries / after software-driven synchronization events. On [MI2XX](2xxnote) accelerators, this type of memory maps to (e.g.,) "coarse-grained" `hipHostMalloc`'d memory (i.e., allocated with the `hipHostMallocNonCoherent` flag), or `hipMalloc`'d memory allocated on a remote accelerator. + * - Coherently Cachable (CC) + - Memory for which only reads from the accelerator where the memory was allocated will be cached. Writes to CC memory are uncached, and trigger invalidations of any line within this accelerator. On [MI2XX](2xxnote) accelerators, this type of memory maps to "fine-grained" memory allocated on the local accelerator using, e.g., the `hipExtMallocWithFlags` API using the `hipDeviceMallocFinegrained` flag. + * - Read/Write Coherent Memory (RW) + - Memory that will be cached by the accelerator, but may be invalidated by writes from remote devices at kernel boundaries / after software-driven synchronization events. On [MI2XX](2xxnote) accelerators, this corresponds to "coarse-grained" memory allocated locally to the accelerator, using e.g., the default `hipMalloc` allocator. +``` + +A good discussion of coarse and fine grained memory allocations and what type of memory is returned by various combinations of memory allocators, flags and arguments can be found in the [Crusher Quick-Start Guide](https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#floating-point-fp-atomic-operations-and-coarse-fine-grained-memory-allocations). + +(profiling-with-omniperf)= +# Profiling with Omniperf by Example + +(VALU_inst_mix_example)= +## VALU Arithmetic Instruction Mix + +For this example, we consider the [instruction mix sample](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/instmix.hip) distributed as a part of Omniperf. + +```{note} +This example is expected to work on all CDNA accelerators, however the results in this section were collected on an [MI2XX](2xxnote) accelerator +``` + +### Design note + +This code uses a number of inline assembly instructions to cleanly identify the types of instructions being issued, as well as to avoid optimization / dead-code elimination by the compiler. +While inline assembly is inherently unportable, this example is expected to work on all GCN GPUs and CDNA accelerators. + +We reproduce a sample of the kernel below: + +```c++ + // fp32: add, mul, transcendental and fma + float f1, f2; + asm volatile( + "v_add_f32_e32 %0, %1, %0\n" + "v_mul_f32_e32 %0, %1, %0\n" + "v_sqrt_f32 %0, %1\n" + "v_fma_f32 %0, %1, %0, %1\n" + : "=v"(f1) + : "v"(f2)); +``` + +These instructions correspond to: + - A 32-bit floating point addition, + - A 32-bit floating point multiplication, + - A 32-bit floating point square-root transcendental operation, and + - A 32-bit floating point fused multiply-add operation. + +For more detail, the reader is referred to (e.g.,) the [CDNA2 ISA Guide](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf). + +### Instruction mix + +This example was compiled and run on a MI250 accelerator using ROCm v5.6.0, and Omniperf v2.0.0. +```shell-session +$ hipcc -O3 instmix.hip -o instmix +``` + +We generate our profile for this example via: +```shell-session +$ omniperf profile -n instmix --no-roof -- ./instmix +``` + +and finally, analyze the instruction mix section: +```shell-session +$ omniperf analyze -p workloads/instmix/mi200/ -b 10.2 +<...> +10. Compute Units - Instruction Mix +10.2 VALU Arithmetic Instr Mix +╒═════════╤════════════╤═════════╤════════════════╕ +│ Index │ Metric │ Count │ Unit │ +╞═════════╪════════════╪═════════╪════════════════╡ +│ 10.2.0 │ INT32 │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.1 │ INT64 │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.2 │ F16-ADD │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.3 │ F16-MUL │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.4 │ F16-FMA │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.5 │ F16-Trans │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.6 │ F32-ADD │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.7 │ F32-MUL │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.8 │ F32-FMA │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.9 │ F32-Trans │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.10 │ F64-ADD │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.11 │ F64-MUL │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.12 │ F64-FMA │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.13 │ F64-Trans │ 1.00 │ Instr per wave │ +├─────────┼────────────┼─────────┼────────────────┤ +│ 10.2.14 │ Conversion │ 1.00 │ Instr per wave │ +╘═════════╧════════════╧═════════╧════════════════╛ +``` + +shows that we have exactly one of each type of VALU arithmetic instruction, by construction! + +(Fabric_transactions_example)= +## Infinity-Fabric(tm) transactions + +For this example, we consider the [Infinity Fabric(tm) sample](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/fabric.hip) distributed as a part of Omniperf. +This code launches a simple read-only kernel, e.g.: + +```c++ +// the main streaming kernel +__global__ void kernel(int* x, size_t N, int zero) { + int sum = 0; + const size_t offset_start = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = 0; i < 10; ++i) { + for (size_t offset = offset_start; offset < N; offset += blockDim.x * gridDim.x) { + sum += x[offset]; + } + } + if (sum != 0) { + x[offset_start] = sum; + } +} +``` + +twice; once as a warmup, and once for analysis. +We note that the buffer `x` is initialized to all zeros via a call to `hipMemcpy` on the host before the kernel is ever launched, therefore the conditional: + +```c++ +if (sum != 0) { ... +``` + +is identically false (and thus: we expect no writes). + +```{note} +The actual sample included with Omniperf also includes the ability to select different operation types, e.g., atomics, writes, etc. +This abbreviated version is presented here for reference only. +``` + +Finally, this sample code lets the user control: + - The [granularity of an allocation](Mtype), + - The owner of an allocation (local HBM, CPU DRAM or remote HBM), and + - The size of an allocation (the default is $\sim4$GiB) + +via command line arguments. +In doing so, we can explore the impact of these parameters on the L2-Fabric metrics reported by Omniperf to further understand their meaning. + +All results in this section were generated an a node of Infinity Fabric(tm) connected MI250 accelerators using ROCm v5.6.0, and Omniperf v2.0.0. +Although results may vary with ROCm versions and accelerator connectivity, we expect the lessons learned here to be broadly applicable. + +(Fabric_exp_1)= +### Experiment #1 - Coarse-grained, accelerator-local HBM reads + +In our first experiment, we consider the simplest possible case, a `hipMalloc`'d buffer that is local to our current accelerator: + +```shell-session +$ omniperf profile -n coarse_grained_local --no-roof -- ./fabric -t 1 -o 0 +Using: + mtype:CoarseGrained + mowner:Device + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/coarse_grained_local/mi200 -b 17.2.0 17.2.1 17.2.2 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ +│ 17.2.0 │ L2-Fabric Read BW │ 42947428672.00 │ 42947428672.00 │ 42947428672.00 │ Bytes per kernel │ +├─────────┼─────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.1 │ HBM Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼─────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.2 │ Remote Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧═════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.07 │ 0.07 │ 0.07 │ Pct │ +╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ +│ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.1 │ Read (Uncached) │ 1450.00 │ 1450.00 │ 1450.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.2 │ Read (64B) │ 671053573.00 │ 671053573.00 │ 671053573.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.3 │ HBM Read │ 671053565.00 │ 671053565.00 │ 671053565.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.4 │ Remote Read │ 8.00 │ 8.00 │ 8.00 │ Req per kernel │ +╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ +``` + +Here, we see: + - The vast majority of L2-Fabric requests (>99%) are 64B read requests (17.5.2) + - Nearly 100% of the read requests (17.2.1) are homed in on the accelerator-local HBM (17.5.3), while some small fraction of these reads are routed to a "remote" device (17.5.4) + - These drive a $\sim40$GiB per kernel read-bandwidth (17.2.0) + +In addition, we see a small amount of [uncached](Mtype) reads (17.5.1), these correspond to things like: + - the assembly code to execute the kernel + - kernel arguments + - coordinate parameters (e.g., blockDim.z) that were not initialized by the hardware, etc. +and may account for some of our 'remote' read requests (17.5.4), e.g., reading from CPU DRAM. + +The above list is not exhaustive, nor are all of these guaranteed to be 'uncached' -- the exact implementation depends on the accelerator and ROCm versions used. +These read requests could be interrogated further in the [Scalar L1 Data Cache](sL1D) and [Instruction Cache](L1I) metric sections. + +```{note} +The Traffic metrics in Sec 17.2 are presented as a percentage of the total number of requests, e.g. 'HBM Read Traffic' is the percent of read requests (17.5.0-17.5.2) that were directed to the accelerators' local HBM (17.5.3). +``` + +(Fabric_exp_2)= +### Experiment #2 - Fine-grained, accelerator-local HBM reads + +In this experiment, we change the [granularity](Mtype) of our device-allocation to be fine-grained device memory, local to the current accelerator. +Our code uses the `hipExtMallocWithFlag` API with the `hipDeviceMallocFinegrained` flag to accomplish this. + +```{note} +On some systems (e.g., those with only PCIe(r) connected accelerators), you need to set the environment variable `HSA_FORCE_FINE_GRAIN_PCIE=1` to enable this memory type. +``` + +```shell-session +$ omniperf profile -n fine_grained_local --no-roof -- ./fabric -t 0 -o 0 +Using: + mtype:FineGrained + mowner:Device + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/fine_grained_local/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ +│ 17.2.0 │ L2-Fabric Read BW │ 42948661824.00 │ 42948661824.00 │ 42948661824.00 │ Bytes per kernel │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.1 │ HBM Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.2 │ Remote Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.3 │ Uncached Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.07 │ 0.07 │ 0.07 │ Pct │ +╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ +│ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.1 │ Read (Uncached) │ 1334.00 │ 1334.00 │ 1334.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.2 │ Read (64B) │ 671072841.00 │ 671072841.00 │ 671072841.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.3 │ HBM Read │ 671072835.00 │ 671072835.00 │ 671072835.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.4 │ Remote Read │ 6.00 │ 6.00 │ 6.00 │ Req per kernel │ +╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ +``` + +Comparing with our [previous example](Fabric_exp_1), we see a relatively similar result, namely: + - The vast majority of L2-Fabric requests are 64B read requests (17.5.2) + - Nearly all these read requests are directed to the accelerator-local HBM (17.2.1) + +In addition, we now see a small percentage of HBM Read Stalls (17.4.2), as streaming fine-grained memory is putting more stress on Infinity Fabric(tm). + +```{note} +The stalls in Sec 17.4 are presented as a percentage of the total number active L2 cycles, summed over [all L2 channels](L2). +``` + +(Fabric_exp_3)= +### Experiment #3 - Fine-grained, remote-accelerator HBM reads + +In this experiment, we move our [fine-grained](Mtype) allocation to be owned by a remote accelerator. +We accomplish this by first changing the HIP device using e.g., `hipSetDevice(1)` API, then allocating fine-grained memory (as described [previously](Fabric_exp_2)), and finally resetting the device back to the default, e.g., `hipSetDevice(0)`. + +Although we have not changed our code significantly, we do see a substantial change in the L2-Fabric metrics: + +```shell-session +$ omniperf profile -n fine_grained_remote --no-roof -- ./fabric -t 0 -o 2 +Using: + mtype:FineGrained + mowner:Remote + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/fine_grained_remote/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ +│ 17.2.0 │ L2-Fabric Read BW │ 42949692736.00 │ 42949692736.00 │ 42949692736.00 │ Bytes per kernel │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.1 │ HBM Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.2 │ Remote Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.3 │ Uncached Read Traffic │ 200.00 │ 200.00 │ 200.00 │ Pct │ +╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 17.85 │ 17.85 │ 17.85 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════╤═══════════════╤═══════════════╤═══════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╡ +│ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.1 │ Read (Uncached) │ 1342177894.00 │ 1342177894.00 │ 1342177894.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.2 │ Read (64B) │ 671088949.00 │ 671088949.00 │ 671088949.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.3 │ HBM Read │ 307.00 │ 307.00 │ 307.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.4 │ Remote Read │ 671088642.00 │ 671088642.00 │ 671088642.00 │ Req per kernel │ +╘═════════╧═════════════════╧═══════════════╧═══════════════╧═══════════════╧════════════════╛ +``` + +First, we see that while we still observe approximately the same number of 64B Read Requests (17.5.2), we now see an even larger number of Uncached Read Requests (17.5.3). Some simple division reveals: +```math +342177894.00 / 671088949.00 ≈ 2 +``` +That is, each 64B Read Request is _also_ counted as two Uncached Read Requests, as reflected in the [request-flow diagram](fabric-fig). +This is also why the Uncached Read Traffic metric (17.2.3) is at the counter-intuitive value of 200%! + +In addition, we also observe that: + - we no longer see any significant number of HBM Read Requests (17.2.1, 17.5.3), nor HBM Read Stalls (17.4.2), but instead + - we observe that almost all of these requests are considered "remote" (17.2.2, 17.5.4) are being routed to another accelerator, or the CPU --- in this case HIP Device 1 --- and + - we observe a significantly larger percentage of AMD Infinity Fabric(tm) Read Stalls (17.4.1) as compared to the HBM Read Stalls in the [previous example](Fabric_exp_2) + +These stalls correspond to reads that are going out over the AMD Infinity Fabric(tm) connection to another MI250 accelerator. +In addition, because these are crossing between accelerators, we expect significantly lower achievable bandwidths as compared to the local accelerator's HBM -- this is reflected (indirectly) in the magnitude of the stall metric (17.4.1). +Finally, we note that if our system contained only PCIe(r) connected accelerators, these observations will differ. + +(Fabric_exp_4)= +### Experiment #4 - Fine-grained, CPU-DRAM reads + +In this experiment, we move our [fine-grained](Mtype) allocation to be owned by the CPU's DRAM. +We accomplish this by allocating host-pinned fine-grained memory using the `hipHostMalloc` API: + +```shell-session +$ omniperf profile -n fine_grained_host --no-roof -- ./fabric -t 0 -o 1 +Using: + mtype:FineGrained + mowner:Host + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/fine_grained_host/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ +│ 17.2.0 │ L2-Fabric Read BW │ 42949691264.00 │ 42949691264.00 │ 42949691264.00 │ Bytes per kernel │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.1 │ HBM Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.2 │ Remote Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.3 │ Uncached Read Traffic │ 200.00 │ 200.00 │ 200.00 │ Pct │ +╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 91.29 │ 91.29 │ 91.29 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════╤═══════════════╤═══════════════╤═══════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╡ +│ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.1 │ Read (Uncached) │ 1342177848.00 │ 1342177848.00 │ 1342177848.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.2 │ Read (64B) │ 671088926.00 │ 671088926.00 │ 671088926.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.3 │ HBM Read │ 284.00 │ 284.00 │ 284.00 │ Req per kernel │ +├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ +│ 17.5.4 │ Remote Read │ 671088642.00 │ 671088642.00 │ 671088642.00 │ Req per kernel │ +╘═════════╧═════════════════╧═══════════════╧═══════════════╧═══════════════╧════════════════╛ +``` + +Here we see _almost_ the same results as in the [previous experiment](Fabric_exp_3), however now as we are crossing a PCIe(r) bus to the CPU, we see that the Infinity Fabric(tm) Read stalls (17.4.1) have shifted to be a PCIe(r) stall (17.4.2). +In addition, as (on this system) the PCIe(r) bus has a lower peak bandwidth than the AMD Infinity Fabric(TM) connection between two accelerators, we once again observe an increase in the percentage of stalls on this interface. + +```{note} +Had we performed this same experiment on a [MI250X system](https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf), these transactions would again have been marked as Infinity Fabric(tm) Read stalls (17.4.1), as the CPU is connected to the accelerator via AMD Infinity Fabric. +``` + +(Fabric_exp_5)= +### Experiment #5 - Coarse-grained, CPU-DRAM reads + +In our next fabric experiment, we change our CPU memory allocation to be [coarse-grained](Mtype). +We accomplish this by passing the `hipHostMalloc` API the `hipHostMallocNonCoherent` flag, to mark the allocation as coarse-grained: + +```shell-session +$ omniperf profile -n coarse_grained_host --no-roof -- ./fabric -t 1 -o 1 +Using: + mtype:CoarseGrained + mowner:Host + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/coarse_grained_host/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ +│ 17.2.0 │ L2-Fabric Read BW │ 42949691264.00 │ 42949691264.00 │ 42949691264.00 │ Bytes per kernel │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.1 │ HBM Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.2 │ Remote Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.3 │ Uncached Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 91.27 │ 91.27 │ 91.27 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ +│ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.1 │ Read (Uncached) │ 562.00 │ 562.00 │ 562.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.2 │ Read (64B) │ 671088926.00 │ 671088926.00 │ 671088926.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.3 │ HBM Read │ 281.00 │ 281.00 │ 281.00 │ Req per kernel │ +├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.4 │ Remote Read │ 671088645.00 │ 671088645.00 │ 671088645.00 │ Req per kernel │ +╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ +``` + +Here we see a similar result to our [previous experiment](Fabric_exp_4), with one key difference: our accesses are no longer marked as Uncached Read requests (17.2.3, 17.5.1), but instead are 64B read requests (17.5.2), as observed in our [Coarse-grained, accelerator-local HBM](Fabric_exp_1) experiment. + +(Fabric_exp_6)= +### Experiment #6 - Fine-grained, CPU-DRAM writes + +Thus far in our exploration of the L2-Fabric interface, we have primarily focused on read operations. +However, in [our request flow diagram](fabric-fig), we note that writes are counted separately. +To obeserve this, we use the '-p' flag to trigger write operations to fine-grained memory allocated on the host: + +```shell-session +$ omniperf profile -n fine_grained_host_write --no-roof -- ./fabric -t 0 -o 1 -p 1 +Using: + mtype:FineGrained + mowner:Host + mspace:Global + mop:Write + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/fine_grained_host_writes/mi200 -b 17.2.4 17.2.5 17.2.6 17.2.7 17.2.8 17.4.3 17.4.4 17.4.5 17.4.6 17.5.5 17.5.6 17.5.7 17.5.8 17.5.9 17.5.10 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═══════════════════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ +│ 17.2.4 │ L2-Fabric Write and Atomic BW │ 42949672960.00 │ 42949672960.00 │ 42949672960.00 │ Bytes per kernel │ +├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.5 │ HBM Write and Atomic Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.6 │ Remote Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.7 │ Atomic Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ +│ 17.2.8 │ Uncached Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +╘═════════╧═══════════════════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤════════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.3 │ Write - PCIe Stall │ PCIe Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.4 │ Write - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.5 │ Write - HBM Stall │ HBM Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.6 │ Write - Credit Starvation │ Credit Starvation │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧════════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ +│ 17.5.5 │ Write (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.6 │ Write (Uncached) │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │ +├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.7 │ Write (64B) │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │ +├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.8 │ HBM Write and Atomic │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.9 │ Remote Write and Atomic │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │ +├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ +│ 17.5.10 │ Atomic │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +╘═════════╧═════════════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ +``` + +Here we notice a few changes in our request pattern: + - As expected, the requests have changed from 64B Reads to 64B Write requests (17.5.7), + - these requests are homed in on a "remote" destination (17.2.6, 17.5.9), as expected, and, + - these are also counted as a single Uncached Write request (17.5.6). + +In addition, there rather significant changes in the bandwidth values reported: + - the "L2-Fabric Write and Atomic" bandwidth metric (17.2.4) reports about 40GiB of data written across Infinity Fabric(tm) while, + - the "Remote Write and Traffic" metric (17.2.5) indicates that nearly 100% of these request are being directed to a remote source + +The precise meaning of these metrics will be explored in the [subsequent experiment](Fabric_exp_7). + +Finally, we note that we see no write stalls on the PCIe(r) bus (17.4.3). This is because writes over a PCIe(r) bus [are non-posted](https://members.pcisig.com/wg/PCI-SIG/document/10912), i.e., they do not require acknowledgement. + +(Fabric_exp_7)= +### Experiment #7 - Fine-grained, CPU-DRAM atomicAdd + +Next, we change our experiment to instead target `atomicAdd` operations to the CPU's DRAM. + +```shell-session +$ omniperf profile -n fine_grained_host_add --no-roof -- ./fabric -t 0 -o 1 -p 2 +Using: + mtype:FineGrained + mowner:Host + mspace:Global + mop:Add + mdata:Unsigned + remoteId:-1 +<...> +$ omniperf analyze -p workloads/fine_grained_host_add/mi200 -b 17.2.4 17.2.5 17.2.6 17.2.7 17.2.8 17.4.3 17.4.4 17.4.5 17.4.6 17.5.5 17.5.6 17.5.7 17.5.8 17.5.9 17.5.10 -n per_kernel --dispatch 2 +<...> +17. L2 Cache +17.2 L2 - Fabric Transactions +╒═════════╤═══════════════════════════════════╤══════════════╤══════════════╤══════════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════════════════╪══════════════╪══════════════╪══════════════╪══════════════════╡ +│ 17.2.4 │ L2-Fabric Write and Atomic BW │ 429496736.00 │ 429496736.00 │ 429496736.00 │ Bytes per kernel │ +├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ +│ 17.2.5 │ HBM Write and Atomic Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ +│ 17.2.6 │ Remote Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ +│ 17.2.7 │ Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ +│ 17.2.8 │ Uncached Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ +╘═════════╧═══════════════════════════════════╧══════════════╧══════════════╧══════════════╧══════════════════╛ +17.4 L2 - Fabric Interface Stalls +╒═════════╤════════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ +│ 17.4.3 │ Write - PCIe Stall │ PCIe Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.4 │ Write - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.5 │ Write - HBM Stall │ HBM Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ +│ 17.4.6 │ Write - Credit Starvation │ Credit Starvation │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧════════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ +17.5 L2 - Fabric Detailed Transaction Breakdown +╒═════════╤═════════════════════════╤═════════════╤═════════════╤═════════════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════════╪═════════════╪═════════════╪═════════════╪════════════════╡ +│ 17.5.5 │ Write (32B) │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ +├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ +│ 17.5.6 │ Write (Uncached) │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ +├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ +│ 17.5.7 │ Write (64B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ +│ 17.5.8 │ HBM Write and Atomic │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ +├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ +│ 17.5.9 │ Remote Write and Atomic │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ +├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ +│ 17.5.10 │ Atomic │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ +╘═════════╧═════════════════════════╧═════════════╧═════════════╧═════════════╧════════════════╛ +``` + +In this case, there is quite a lot to unpack: + - For the first time, the 32B Write requests (17.5.5) are heavily used. + - These correspond to Atomic requests (17.2.7, 17.5.10), and are counted as Uncached Writes (17.5.6). + - The L2-Fabric Write and Atomic bandwidth metric (17.2.4) shows about 0.4 GiB of traffic. For convenience, the sample reduces the default problem size for this case due to the speed of atomics across a PCIe(r) bus, and finally, + - The traffic is directed to a remote device (17.2.6, 17.5.9) + +Let us consider what an "atomic" request means in this context. +Recall that we are discussing memory traffic flowing from the L2 cache, the device-wide coherence point on current CDNA accelerators such as the MI250, to e.g., the CPU's DRAM. +In this light, we see that these requests correspond to _system scope_ atomics, and specifically in the case of the MI250, to fine-grained memory! + + + +## Vector memory operation counting + +(flatmembench)= +### Global / Generic (FLAT) + +For this example, we consider the [vector-memory sample](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/vmem.hip) distributed as a part of Omniperf. +This code launches many different versions of a simple read/write/atomic-only kernels targeting various address spaces, e.g. below is our simple `global_write` kernel: + +```c++ +// write to a global pointer +__global__ void global_write(int* ptr, int zero) { + ptr[threadIdx.x] = zero; +} +``` + +This example was compiled and run on an MI250 accelerator using ROCm v5.6.0, and Omniperf v2.0.0. +```shell-session +$ hipcc -O3 --save-temps vmem.hip -o vmem +``` +We have also chosen to include the `--save-temps` flag to save the compiler temporary files, such as the generated CDNA assembly code, for inspection. + +Finally, we generate our omniperf profile as: +```shell-session +$ omniperf profile -n vmem --no-roof -- ./vmem +``` + +(Flat_design)= +#### Design note + +We should explain some of the more peculiar line(s) of code in our example, e.g., the use of compiler builtins and explicit address space casting, etc. +```c++ +// write to a generic pointer +typedef int __attribute__((address_space(0)))* generic_ptr; + +__attribute__((noinline)) __device__ void generic_store(generic_ptr ptr, int zero) { *ptr = zero; } + +__global__ void generic_write(int* ptr, int zero, int filter) { + __shared__ int lds[1024]; + int* generic = (threadIdx.x < filter) ? &ptr[threadIdx.x] : &lds[threadIdx.x]; + generic_store((generic_ptr)generic, zero); +} +``` + +One of our aims in this example is to demonstrate the use of the ['generic' (a.k.a., FLAT)](https://llvm.org/docs/AMDGPUUsage.html#address-space-identifier) address space. +This address space is typically used when the compiler cannot statically prove where the backing memory is located. + +To try to _force_ the compiler to use this address space, we have applied `__attribute__((noinline))` to the `generic_store` function to have the compiler treat it as a function call (i.e., on the other-side of which, the address space may not be known). +However, in a trivial example such as this, the compiler may choose to specialize the `generic_store` function to the two address spaces that may provably be used from our translation-unit, i.e., ['local' (a.k.a., LDS)](Mspace) and ['global'](Mspace). Hence, we forcibly cast the address space to ['generic' (i.e., FLAT)](Mspace) to avoid this compiler optimization. + +```{warning} +While convenient for our example here, this sort of explicit address space casting can lead to strange compilation errors, and in the worst cases, incorrect results and thus use is discouraged in production code. +``` + +For more details on address spaces, the reader is referred to the [address-space section](Mspace). + +#### Global Write + +First, we demonstrate our simple `global_write` kernel: +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 1 -b 10.3 15.1.4 15.1.5 15.1.6 15.1.7 15.1.8 15.1.9 15.1.10 15.1.11 -n per_kernel +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ global_write(int*, int) [clone .kd] │ 1.00 │ 2400.00 │ 2400.00 │ 2400.00 │ 100.00 │ +╘════╧═════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +15. Address Processing Unit and Data Return Path (TA/TD) +15.1 Address Processing Unit +╒═════════╤═════════════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 15.1.4 │ Total Instructions │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.5 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.6 │ Global/Generic Read Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.7 │ Global/Generic Write Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.8 │ Global/Generic Atomic Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.9 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.10 │ Spill/Stack Read Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 15.1.11 │ Spill/Stack Write Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═════════════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ +``` + +Here, we have presented both the information in the VMEM Instruction Mix table (10.3) and the Address Processing Unit (15.1). +We note that this data is expected to be identical, and hence we omit table 15.1 in our subsequent examples. + +In addition, as expected, we see a single Global/Generic write instruction (10.3.2, 15.1.7). +Inspecting the generated assembly: + +```asm + .protected _Z12global_writePii ; -- Begin function _Z12global_writePii + .globl _Z12global_writePii + .p2align 8 + .type _Z12global_writePii,@function +_Z12global_writePii: ; @_Z12global_writePii +; %bb.0: + s_load_dword s2, s[4:5], 0x8 + s_load_dwordx2 s[0:1], s[4:5], 0x0 + v_lshlrev_b32_e32 v0, 2, v0 + s_waitcnt lgkmcnt(0) + v_mov_b32_e32 v1, s2 + global_store_dword v0, v1, s[0:1] + s_endpgm + .section .rodata,#alloc + .p2align 6, 0x0 + .amdhsa_kernel _Z12global_writePii +``` + +we see that this corresponds to an instance of a `global_store_dword` operation. + +```{note} +The assembly in these experiments were generated for an [MI2XX](2xxnote) accelerator using ROCm 5.6.0, and may change depending on ROCm versions and the targeted hardware architecture +``` + +(Generic_write)= +#### Generic Write to LDS + +Next, we examine a generic write. +As discussed [previously](Flat_design), our `generic_write` kernel uses an address space cast to _force_ the compiler to choose our desired address space, regardless of other optimizations that may be possible. + +We also note that the `filter` parameter passed in as a kernel argument (see [example](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/vmem.hip), or [design note](Flat_design)) is set to zero on the host, such that we always write to the 'local' (LDS) memory allocation `lds`. + +Examining this kernel in the VMEM Instruction Mix table yields: + +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 2 -b 10.3 -n per_kernel +<...> +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ generic_write(int*, int, int) [clone .kd │ 1.00 │ 2880.00 │ 2880.00 │ 2880.00 │ 100.00 │ +│ │ ] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ +``` + +As expected we see a single generic write (10.3.2). +In the assembly generated for this kernel (in particular, we care about the `generic_store` function). We see that this corresponds to a `flat_store_dword` instruction: + +```asm + .type _Z13generic_storePii,@function +_Z13generic_storePii: ; @_Z13generic_storePii +; %bb.0: + s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + flat_store_dword v[0:1], v2 + s_waitcnt vmcnt(0) lgkmcnt(0) + s_setpc_b64 s[30:31] +.Lfunc_end0: +``` + +In addition, we note that we can observe the destination of this request by looking at the LDS Instructions metric (12.2.0): +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 2 -b 12.2.0 -n per_kernel +<...> +12. Local Data Share (LDS) +12.2 LDS Stats +╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 12.2.0 │ LDS Instrs │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛ +``` +which indicates one LDS access. + +```{note} +Exercise for the reader: if this access had been targeted at global memory (e.g., by changing value of `filter`), where should we look for the memory traffic? Hint: see our [generic read](Generic_read) example. +``` + +#### Global read + +Next, we examine a simple global read operation: + +```c++ +__global__ void global_read(int* ptr, int zero) { + int x = ptr[threadIdx.x]; + if (x != zero) { + ptr[threadIdx.x] = x + 1; + } +} +``` + +Here we observe a now familiar pattern: + - Read a value in from global memory + - Have a write hidden behind a conditional that is impossible for the compiler to statically eliminate, but is identically false. In this case, our `main()` function initializes the data in `ptr` to zero. + +Running Omniperf on this kernel yields: + +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 3 -b 10.3 -n per_kernel +<...> +0. Top Stat +╒════╤════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ global_read(int*, int) [clone .kd] │ 1.00 │ 4480.00 │ 4480.00 │ 4480.00 │ 100.00 │ +╘════╧════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ +``` + +Here we see a single global/generic instruction (10.3.0) which, as expected, is a read (10.3.1). + +(Generic_read)= +#### Generic read from global memory + +For our generic read example, we choose to change our target for the generic read to be global memory: +```c++ +__global__ void generic_read(int* ptr, int zero, int filter) { + __shared__ int lds[1024]; + if (static_cast(filter - 1) == zero) { + lds[threadIdx.x] = 0; // initialize to zero to avoid conditional, but hide behind _another_ conditional + } + int* generic; + if (static_cast(threadIdx.x) > filter - 1) { + generic = &ptr[threadIdx.x]; + } else { + generic = &lds[threadIdx.x]; + abort(); + } + int x = generic_load((generic_ptr)generic); + if (x != zero) { + ptr[threadIdx.x] = x + 1; + } +} +``` + +In addition to our usual `if (condition_that_wont_happen)` guard around the write operation, there is an additional conditional around the initialization of the `lds` buffer. +We note that it's typically required to write to this buffer to prevent the compiler from eliminating the local memory branch entirely due to undefined behavior (use of an uninitialized value). +However, to report _only_ our global memory read, we again hide this initialization behind an identically false conditional (both `zero` and `filter` are set to zero in the kernel launch). Note that this is a _different_ conditional from our pointer assignment (to avoid combination of the two). + +Running Omniperf on this kernel reports: +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 4 -b 10.3 12.2.0 16.3.10 -n per_kernel +<...> +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ generic_read(int*, int, int) [clone .kd] │ 1.00 │ 2240.00 │ 2240.00 │ 2240.00 │ 100.00 │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +12. Local Data Share (LDS) +12.2 LDS Stats +╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 12.2.0 │ LDS Instrs │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +16. Vector L1 Data Cache +16.3 L1D Cache Accesses +╒═════════╤════════════╤═══════╤═══════╤═══════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════╪═══════╪═══════╪═══════╪════════════════╡ +│ 16.3.10 │ L1-L2 Read │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ +╘═════════╧════════════╧═══════╧═══════╧═══════╧════════════════╛ +``` + +Here we observe: + - A single global/generic read operation (10.3.1), which + - Is not an LDS instruction (12.2), as seen in our [generic write](Generic_write) example, but is instead + - An L1-L2 read operation (16.3.10) + +That is, we have successfully targeted our generic read at global memory. +Inspecting the assembly shows this corresponds to a `flat_load_dword` instruction. + +(Global_atomic)= +#### Global atomic + +Our global atomic kernel: +```c++ +__global__ void global_atomic(int* ptr, int zero) { + atomicAdd(ptr, zero); +} +``` +simply atomically adds a (non-compile-time) zero value to a pointer. + +Running Omniperf on this kernel yields: +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 5 -b 10.3 16.3.12 -n per_kernel +<...> +0. Top Stat +╒════╤══════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ global_atomic(int*, int) [clone .kd] │ 1.00 │ 4640.00 │ 4640.00 │ 4640.00 │ 100.00 │ +╘════╧══════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +16. Vector L1 Data Cache +16.3 L1D Cache Accesses +╒═════════╤══════════════╤═══════╤═══════╤═══════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪══════════════╪═══════╪═══════╪═══════╪════════════════╡ +│ 16.3.12 │ L1-L2 Atomic │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ +╘═════════╧══════════════╧═══════╧═══════╧═══════╧════════════════╛ +``` + +Here we see a single global/generic atomic instruction (10.3.3), which corresponds to an L1-L2 atomic request (16.3.12). + +(Generic_atomic)= +#### Generic, mixed atomic + +In our final global/generic example, we look at a case where our generic operation targets both LDS and global memory: +```c++ +__global__ void generic_atomic(int* ptr, int filter, int zero) { + __shared__ int lds[1024]; + int* generic = (threadIdx.x % 2 == filter) ? &ptr[threadIdx.x] : &lds[threadIdx.x]; + generic_atomic((generic_ptr)generic, zero); +} +``` + +This assigns every other work-item to atomically update global memory or local memory. + +Running this kernel through Omniperf shows: +```shell-session +$ omniperf analyze -p workloads/vmem/mi200/ --dispatch 6 -b 10.3 12.2.0 16.3.12 -n per_kernel +<...> +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ +│ 0 │ generic_atomic(int*, int, int) [clone .k │ 1.00 │ 3360.00 │ 3360.00 │ 3360.00 │ 100.00 │ +│ │ d] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +12. Local Data Share (LDS) +12.2 LDS Stats +╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 12.2.0 │ LDS Instrs │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +16. Vector L1 Data Cache +16.3 L1D Cache Accesses +╒═════════╤══════════════╤═══════╤═══════╤═══════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪══════════════╪═══════╪═══════╪═══════╪════════════════╡ +│ 16.3.12 │ L1-L2 Atomic │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ +╘═════════╧══════════════╧═══════╧═══════╧═══════╧════════════════╛ +``` + +That is, we see: + - A single generic atomic instruction (10.3.3) that maps to both + - an LDS instruction (12.2.0), and + - an L1-L2 atomic request (16.3) + +We have demonstrated the ability of the generic address space to _dynamically_ target different backing memory! + +(buffermembench)= +### Spill/Scratch (BUFFER) + +Next we examine the use of 'Spill/Scratch' memory. +On current CDNA accelerators such as the [MI2XX](2xxnote), this is implemented using the [private](mspace) memory space, which maps to ['scratch' memory](https://llvm.org/docs/AMDGPUUsage.html#amdgpu-address-spaces) in AMDGPU hardware terminology. +This type of memory can be accessed via different instructions depending on the specific architecture targeted. However, current CDNA accelerators such as the [MI2XX](2xxnote) use so called `buffer` instructions to access private memory in a simple (and typically) coalesced manner. See [Sec. 9.1, 'Vector Memory Buffer Instructions' of the CDNA2 ISA guide](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf) for further reading on this instruction type. + +We develop a [simple kernel](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/stack.hip) that uses stack memory: +```c++ +#include +__global__ void knl(int* out, int filter) { + int x[1024]; + x[filter] = 0; + if (threadIdx.x < filter) + out[threadIdx.x] = x[threadIdx.x]; +} +``` + +Our strategy here is to: + - Create a large stack buffer (that cannot reasonably fit into registers) + - Write to a compile-time unknown location on the stack, and then + - Behind the typical compile-time unknown `if(condition_that_wont_happen)` + - Read from a different, compile-time unknown, location on the stack and write to global memory to prevent the compiler from optimizing it out. + +This example was compiled and run on an MI250 accelerator using ROCm v5.6.0, and Omniperf v2.0.0. +```shell-session +$ hipcc -O3 stack.hip -o stack.hip +``` +and profiled using omniperf: +```shell-session +$ omniperf profile -n stack --no-roof -- ./stack +<...> +$ omniperf analyze -p workloads/stack/mi200/ -b 10.3 16.3.11 -n per_kernel +<...> +10. Compute Units - Instruction Mix +10.3 VMEM Instr Mix +╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ +│ 10.3.0 │ Global/Generic Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.4 │ Spill/Stack Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.6 │ Spill/Stack Write │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ +├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ +│ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ +╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + +-------------------------------------------------------------------------------- +16. Vector L1 Data Cache +16.3 L1D Cache Accesses +╒═════════╤═════════════╤═══════╤═══════╤═══════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════╪═══════╪═══════╪═══════╪════════════════╡ +│ 16.3.11 │ L1-L2 Write │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ +╘═════════╧═════════════╧═══════╧═══════╧═══════╧════════════════╛ +``` + +Here we see a single write to the stack (10.3.6), which corresponds to an L1-L2 write request (16.3.11), i.e., the stack is backed by global memory and travels through the same memory hierarchy. + +(IPC_example)= +## Instructions-per-cycle and Utilizations example + +For this section, we use the instructions-per-cycle (IPC) [example](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/ipc.hip) included with Omniperf. + +This example is compiled using `c++17` support: + +```shell-session +$ hipcc -O3 ipc.hip -o ipc -std=c++17 +``` + +and was run on an MI250 CDNA2 accelerator: + +```shell-session +$ omniperf profile -n ipc --no-roof -- ./ipc +``` + +The results shown in this section are _generally_ applicable to CDNA accelerators, but may vary between generations and specific products. + +### Design note + +The kernels in this example all execute a specific assembly operation `N` times (1000, by default), for instance the `vmov` kernel: + +```c++ +template +__device__ void vmov_op() { + int dummy; + if constexpr (N >= 1) { + asm volatile("v_mov_b32 v0, v1\n" : : "{v31}"(dummy)); + vmov_op(); + } +} + +template +__global__ void vmov() { + vmov_op(); +} +``` + +The kernels are then launched twice, once for a warm-up run, and once for measurement. + +(VALU_ipc)= +### VALU Utilization and IPC + +Now we can use our test to measure the achieved instructions-per-cycle of various types of instructions. +We start with a simple [VALU](valu) operation, i.e., a `v_mov_b32` instruction, e.g.: + +```asm +v_mov_b32 v0, v1 +``` + +This instruction simply copies the contents from the source register (`v1`) to the destination register (`v0`). +Investigating this kernel with Omniperf, we see: + +```shell-session +$ omniperf analyze -p workloads/ipc/mi200/ --dispatch 7 -b 11.2 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ +│ 0 │ void vmov<1000>() [clone .kd] │ 1.00 │ 99317423.00 │ 99317423.00 │ 99317423.00 │ 100.00 │ +╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +11. Compute Units - Compute Pipeline +11.2 Pipeline Stats +╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ +│ 11.2.0 │ IPC │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.2 │ SALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.3 │ VALU Util │ 99.98 │ 99.98 │ 99.98 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.5 │ Branch Util │ 0.1 │ 0.1 │ 0.1 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.6 │ VALU Active Threads │ 64.0 │ 64.0 │ 64.0 │ Threads │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ +╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ +``` + +Here we see that: + + 1. Both the IPC (11.2.0) and "Issued" IPC (11.2.1) metrics are $\sim 1$ + 2. The VALU Utilization metric (11.2.3) is also $\sim100\%$, and finally + 3. The VALU Active Threads metric (11.2.4) is 64, i.e., the wavefront size on CDNA accelerators, as all threads in the wavefront are active. + +We will explore the difference between the IPC (11.2.0) and "Issued" IPC (11.2.1) metrics in the [next section](Issued_ipc). + +Additionally, we notice a small (0.1%) Branch utilization (11.2.5). +Inspecting the assembly of this kernel shows there are no branch operations, however recalling the note in the [Pipeline statistics](Pipeline_stats) section: + +> the Branch utilization <...> includes time spent in other instruction types (namely: `s_endpgm`) that are _typically_ a very small percentage of the overall kernel execution. + +we see that this is coming from execution of the `s_endpgm` instruction at the end of every wavefront. + +```{note} +Technically, the cycle counts used in the denominators of our IPC metrics are actually in units of quad-cycles, a group of 4 consecutive cycles. +However, a typical [VALU](valu) instruction on CDNA accelerators runs for a single quad-cycle (see [Layla Mah's GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah), slide 30). +Therefore, for simplicity, we simply report these metrics as "instructions per cycle". +``` + +(Issued_ipc)= +### Exploring "Issued" IPC via MFMA operations + +```{warning} +The MFMA assembly operations used in this example are inherently unportable to older CDNA architectures. +``` + +Unlike the simple quad-cycle `v_mov_b32` operation discussed in our [previous example](VALU_ipc), some operations take many quad-cycles to execute. +For example, using the [AMD Matrix Instruction Calculator](https://github.com/RadeonOpenCompute/amd_matrix_instruction_calculator#example-of-querying-instruction-information) we can see that some [MFMA](mfma) operations take 64 cycles, e.g.: + +```shell-session +$ ./matrix_calculator.py --arch CDNA2 --detail-instruction --instruction v_mfma_f32_32x32x8bf16_1k +Architecture: CDNA2 +Instruction: V_MFMA_F32_32X32X8BF16_1K +<...> + Execution statistics: + FLOPs: 16384 + Execution cycles: 64 + FLOPs/CU/cycle: 1024 + Can co-execute with VALU: True + VALU co-execution cycles possible: 60 +``` + +What happens to our IPC when we utilize this `v_mfma_f32_32x32x8bf16_1k` instruction on a CDNA2 accelerator? +To find out, we turn to our `mfma` kernel in the IPC example: + +```shell-session +$ omniperf analyze -p workloads/ipc/mi200/ --dispatch 8 -b 11.2 --decimal 4 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═══════════════════════════════╤═════════╤═════════════════╤═════════════════╤═════════════════╤══════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═══════════════════════════════╪═════════╪═════════════════╪═════════════════╪═════════════════╪══════════╡ +│ 0 │ void mfma<1000>() [clone .kd] │ 1.0000 │ 1623167595.0000 │ 1623167595.0000 │ 1623167595.0000 │ 100.0000 │ +╘════╧═══════════════════════════════╧═════════╧═════════════════╧═════════════════╧═════════════════╧══════════╛ + + +-------------------------------------------------------------------------------- +11. Compute Units - Compute Pipeline +11.2 Pipeline Stats +╒═════════╤═════════════════════╤═════════╤═════════╤═════════╤══════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════╪═════════╪═════════╪═════════╪══════════════╡ +│ 11.2.0 │ IPC │ 0.0626 │ 0.0626 │ 0.0626 │ Instr/cycle │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.1 │ IPC (Issued) │ 1.0000 │ 1.0000 │ 1.0000 │ Instr/cycle │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.2 │ SALU Util │ 0.0000 │ 0.0000 │ 0.0000 │ Pct │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.3 │ VALU Util │ 6.2496 │ 6.2496 │ 6.2496 │ Pct │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.4 │ VMEM Util │ 0.0000 │ 0.0000 │ 0.0000 │ Pct │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.5 │ Branch Util │ 0.0062 │ 0.0062 │ 0.0062 │ Pct │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.6 │ VALU Active Threads │ 64.0000 │ 64.0000 │ 64.0000 │ Threads │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.7 │ MFMA Util │ 99.9939 │ 99.9939 │ 99.9939 │ Pct │ +├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ +│ 11.2.8 │ MFMA Instr Cycles │ 64.0000 │ 64.0000 │ 64.0000 │ Cycles/instr │ +╘═════════╧═════════════════════╧═════════╧═════════╧═════════╧══════════════╛ +``` + +In contrast to our [VALU IPC example](VALU_ipc), we now see that the IPC metric (11.2.0) and Issued IPC (11.2.1) metric differ substantially. +First, we see the VALU utilization (11.2.3) has decreased substantially, from nearly 100% to $\sim6.25\%$. +We note that this matches the ratio of: + +```math +((Execution\ cycles) - (VALU\ coexecution\ cycles)) / (Execution\ cycles) +``` +reported by the matrix calculator, while the MFMA utilization (11.2.7) has increased to nearly 100%. + + +Recall: our `v_mfma_f32_32x32x8bf16_1k` instruction takes 64 cycles to execute, or 16 quad-cycles, matching our observed MFMA Instruction Cycles (11.2.8). +That is, we have a single instruction executed every 16 quad-cycles, or: + +```math +1/16 = 0.0625 +``` + +which is almost identical to our IPC metric (11.2.0). +Why then is the Issued IPC metric (11.2.1) equal to 1.0 then? + +Instead of simply counting the number of instructions issued and dividing by the number of cycles the [CUs](CU) on the accelerator were active (as is done for 11.2.0), this metric is formulated differently, and instead counts the number of (non-[internal](Internal_ipc)) instructions issued divided by the number of (quad-) cycles where the [scheduler](scheduler) was actively working on issuing instructions. +Thus the Issued IPC metric (11.2.1) gives more of a sense of "what percent of the total number of [scheduler](scheduler) cycles did a wave schedule an instruction?" while the IPC metric (11.2.0) indicates the ratio of the number of instructions executed over the total [active CU cycles](TotalActiveCUCycles). + +```{warning} +There are further complications of the Issued IPC metric (11.2.1) that make its use more complicated. +We will be explore that in the [subsequent section](Internal_ipc). +For these reasons, Omniperf typically promotes use of the regular IPC metric (11.2.0), e.g., in the top-level Speed-of-Light chart. +``` + +(Internal_ipc)= +### "Internal" instructions and IPC + +Next, we explore the concept of an "internal" instruction. +From [Layla Mah's GCN Crash Course](https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah) (slide 29), we see a few candidates for internal instructions, and we choose a `s_nop` instruction, which according to the [CDNA2 ISA Guide](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf): + +>Does nothing; it can be repeated in hardware up to eight times. + +Here we choose to use a no-op of: + +```asm +s_nop 0x0 +``` + +to make our point. Running this kernel through Omniperf yields: + +```shell-session +$ omniperf analyze -p workloads/ipc/mi200/ --dispatch 9 -b 11.2 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ +│ 0 │ void snop<1000>() [clone .kd] │ 1.00 │ 14221851.50 │ 14221851.50 │ 14221851.50 │ 100.00 │ +╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +11. Compute Units - Compute Pipeline +11.2 Pipeline Stats +╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ +│ 11.2.0 │ IPC │ 6.79 │ 6.79 │ 6.79 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.2 │ SALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.3 │ VALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.5 │ Branch Util │ 0.68 │ 0.68 │ 0.68 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.6 │ VALU Active Threads │ │ │ │ Threads │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ +╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ +``` + +First, we see that the IPC metric (11.2.0) tops our theoretical maximum of 5 instructions per cycle (discussed in the [scheduler](scheduler) section). +How can this be? + +Recall that Layla's slides say "no functional unit" for the internal instructions. +This removes the limitation on the IPC. If we are _only_ issuing internal instructions, we are not issuing to any execution units! +However, workloads such as these are almost _entirely_ artificial (i.e., repeatedly issuing internal instructions almost exclusively). In practice, a maximum of IPC of 5 is expected in almost all cases. + +Secondly, we note that our "Issued" IPC (11.2.1) is still identical to one here. +Again, this has to do with the details of "internal" instructions. +Recall in our [previous example](Issued_ipc) we defined this metric as explicitly excluding internal instruction counts. +The logical question then is, 'what _is_ this metric counting in our `s_nop` kernel?' + +The generated assembly looks something like: + +```asm +;;#ASMSTART +s_nop 0x0 +;;#ASMEND +;;#ASMSTART +s_nop 0x0 +;;#ASMEND +;;<... omitting many more ...> +s_endpgm +.section .rodata,#alloc +.p2align 6, 0x0 +.amdhsa_kernel _Z4snopILi1000EEvv +``` + +Of particular interest here is the `s_endpgm` instruction, of which the [CDNA2 ISA guide](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf) states: + +>End of program; terminate wavefront. + +This is not on our list of internal instructions from Layla's tutorial, and is therefore counted as part of our Issued IPC (11.2.1). +Thus: the issued IPC being equal to one here indicates that we issued an `s_endpgm` instruction every cycle the [scheduler](scheduler) was active for non-internal instructions, which is expected as this was our _only_ non-internal instruction! + + +(SALU_ipc)= +### SALU Utilization + +Next, we explore a simple [SALU](salu) kernel in our on-going IPC and utilization example. +For this case, we select a simple scalar move operation, e.g.: + +```asm +s_mov_b32 s0, s1 +``` + +which, in analogue to our [`v_mov`](VALU_ipc) example, copies the contents of the source scalar register (`s1`) to the destination scalar register (`s0`). +Running this kernel through Omniperf yields: + +```shell-session +$ omniperf analyze -p workloads/ipc/mi200/ --dispatch 10 -b 11.2 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ +│ 0 │ void smov<1000>() [clone .kd] │ 1.00 │ 96246554.00 │ 96246554.00 │ 96246554.00 │ 100.00 │ +╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +11. Compute Units - Compute Pipeline +11.2 Pipeline Stats +╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ +│ 11.2.0 │ IPC │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.2 │ SALU Util │ 99.98 │ 99.98 │ 99.98 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.3 │ VALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.5 │ Branch Util │ 0.1 │ 0.1 │ 0.1 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.6 │ VALU Active Threads │ │ │ │ Threads │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ +╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ +``` + +Here we see that: + - both our IPC (11.2.0) and Issued IPC (11.2.1) are $\sim1.0$ as expected, and, + - the SALU Utilization (11.2.2) was nearly 100% as it was active for almost the entire kernel. + +(VALU_Active_Threads)= +### VALU Active Threads + +For our final IPC/Utilization example, we consider a slight modification of our [`v_mov`](VALU_ipc) example: + +```c++ +template +__global__ void vmov_with_divergence() { + if (threadIdx.x % 64 == 0) + vmov_op(); +} +``` + +That is, we wrap our [VALU](valu) operation inside a conditional where only one lane in our wavefront is active. +Running this kernel through Omniperf yields: + +```shell-session +$ omniperf analyze -p workloads/ipc/mi200/ --dispatch 11 -b 11.2 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤══════════════════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪══════════════════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ +│ 0 │ void vmov_with_divergence<1000>() [clone │ 1.00 │ 97125097.00 │ 97125097.00 │ 97125097.00 │ 100.00 │ +│ │ .kd] │ │ │ │ │ │ +╘════╧══════════════════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +11. Compute Units - Compute Pipeline +11.2 Pipeline Stats +╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ +│ 11.2.0 │ IPC │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.2 │ SALU Util │ 0.1 │ 0.1 │ 0.1 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.3 │ VALU Util │ 99.98 │ 99.98 │ 99.98 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.5 │ Branch Util │ 0.2 │ 0.2 │ 0.2 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.6 │ VALU Active Threads │ 1.13 │ 1.13 │ 1.13 │ Threads │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ +├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ +│ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ +╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ +``` + +Here we see that once again, our VALU Utilization (11.2.3) is nearly 100%. +However, we note that the VALU Active Threads metric (11.2.6) is $\sim 1$, which matches our conditional in the source code. +So VALU Active Threads reports the average number of lanes of our wavefront that are active over all [VALU](valu) instructions, or thread "convergence" (i.e., 1 - [divergence](Divergence)). + +```{note} +We note here that: + +1. The act of evaluating a vector conditional in this example typically triggers VALU operations, contributing to why the VALU Active Threads metric is not identically one. +2. This metric is a time (cycle) averaged value, and thus contains an implicit dependence on the duration of various VALU instructions. + +Nonetheless, this metric serves as a useful measure of thread-convergence. +``` + +Finally, we note that our branch utilization (11.2.5) has increased slightly from our baseline, as we now have a branch (checking the value of `threadIdx.x`). + +## LDS Examples + +For this example, we consider the [LDS sample](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/lds.hip) distributed as a part of Omniperf. +This code contains two kernels to explore how both [LDS](lds) bandwidth and bank conflicts are calculated in Omniperf. + +This example was compiled and run on an MI250 accelerator using ROCm v5.6.0, and Omniperf v2.0.0. +```shell-session +$ hipcc -O3 lds.hip -o lds +``` + +Finally, we generate our omniperf profile as: +```shell-session +$ omniperf profile -n lds --no-roof -- ./lds +``` + +(lds_bandwidth)= +### LDS Bandwidth + +To explore our 'theoretical LDS bandwidth' metric, we use a simple kernel: + +```c++ +constexpr unsigned max_threads = 256; +__global__ void load(int* out, int flag) { + __shared__ int array[max_threads]; + int index = threadIdx.x; + // fake a store to the LDS array to avoid unwanted behavior + if (flag) + array[max_threads - index] = index; + __syncthreads(); + int x = array[index]; + if (x == int(-1234567)) + out[threadIdx.x] = x; +} +``` + +Here we: + - Create an array of 256 integers in [LDS](lds) + - Fake a write to the LDS using the `flag` variable (always set to zero on the host) to avoid dead-code elimination + - Read a single integer per work-item from `threadIdx.x` of the LDS array + - If the integer is equal to a magic number (always false), write the value out to global memory to again, avoid dead-code elimination + +Finally, we launch this kernel repeatedly, varying the number of threads in our workgroup: + +```c++ +void bandwidth_demo(int N) { + for (int i = 1; i <= N; ++i) + load<<<1,i>>>(nullptr, 0); + hipDeviceSynchronize(); +} +``` + +Next, let's analyze the first of our bandwidth kernel dispatches: + +```shell-session +$ omniperf analyze -p workloads/lds/mi200/ -b 12.2.1 --dispatch 0 -n per_kernel +<...> +12. Local Data Share (LDS) +12.2 LDS Stats +╒═════════╤═══════════════════════╤════════╤════════╤════════╤══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪═══════════════════════╪════════╪════════╪════════╪══════════════════╡ +│ 12.2.1 │ Theoretical Bandwidth │ 256.00 │ 256.00 │ 256.00 │ Bytes per kernel │ +╘═════════╧═══════════════════════╧════════╧════════╧════════╧══════════════════╛ +``` + +Here we see that our Theoretical Bandwidth metric (12.2.1) is reporting 256 Bytes were loaded even though we launched a single work-item workgroup, and thus only loaded a single integer from LDS. Why is this? + +Recall our definition of this metric: + +> Indicates the maximum amount of bytes that could have been loaded from/stored to/atomically updated in the LDS per [normalization-unit](normunit). + +Here we see that this instruction _could_ have loaded up to 256 bytes of data (4 bytes for each work-item in the wavefront), and therefore this is the expected value for this metric in Omniperf, hence why this metric is named the "theoretical" bandwidth. + +To further illustrate this point we plot the relationship of the theoretical bandwidth metric (12.2.1) as compared to the effective (or achieved) bandwidth of this kernel, varying the number of work-items launched from 1 to 256: + +```{figure} images/ldsbandwidth.* +:scale: 50 % +:alt: Comparison of effective bandwidth versus the theoretical bandwidth metric in Omniperf for our simple example. +:align: center + +Comparison of effective bandwidth versus the theoretical bandwidth metric in Omniperf for our simple example. +``` + +Here we see that the theoretical bandwidth metric follows a step-function. It increases only when another wavefront issues an LDS instruction for up to 256 bytes of data. Such increases are marked in the plot using dashed lines. +In contrast, the effective bandwidth increases linearly, by 4 bytes, with the number of work-items in the kernel, N. + +(lds_bank_conflicts)= +### Bank Conflicts + +Next we explore bank conflicts using a slight modification of our bandwidth kernel: + +```c++ +constexpr unsigned nbanks = 32; +__global__ void conflicts(int* out, int flag) { + constexpr unsigned nelements = nbanks * max_threads; + __shared__ int array[nelements]; + // each thread reads from the same bank + int index = threadIdx.x * nbanks; + // fake a store to the LDS array to avoid unwanted behavior + if (flag) + array[max_threads - index] = index; + __syncthreads(); + int x = array[index]; + if (x == int(-1234567)) + out[threadIdx.x] = x; +} +``` + +Here we: + - Allocate an [LDS](lds) array of size $32*256*4{B}=32{KiB}$ + - Fake a write to the LDS using the `flag` variable (always set to zero on the host) to avoid dead-code elimination + - Read a single integer per work-item from index `threadIdx.x * nbanks` of the LDS array + - If the integer is equal to a magic number (always false), write the value out to global memory to, again, avoid dead-code elimination. + +On the host, we again repeatedly launch this kernel, varying the number of work-items: + +```c++ +void conflicts_demo(int N) { + for (int i = 1; i <= N; ++i) + conflicts<<<1,i>>>(nullptr, 0); + hipDeviceSynchronize(); +} +``` + +Analyzing our first `conflicts` kernel (i.e., a single work-item), we see: + +```shell-session +$ omniperf analyze -p workloads/lds/mi200/ -b 12.2.4 12.2.6 --dispatch 256 -n per_kernel +<...> +-------------------------------------------------------------------------------- +12. Local Data Share (LDS) +12.2 LDS Stats +╒═════════╤════════════════╤═══════╤═══════╤═══════╤═══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════╪═══════╪═══════╪═══════╪═══════════════════╡ +│ 12.2.4 │ Index Accesses │ 2.00 │ 2.00 │ 2.00 │ Cycles per kernel │ +├─────────┼────────────────┼───────┼───────┼───────┼───────────────────┤ +│ 12.2.6 │ Bank Conflict │ 0.00 │ 0.00 │ 0.00 │ Cycles per kernel │ +╘═════════╧════════════════╧═══════╧═══════╧═══════╧═══════════════════╛ +``` + +In our [previous example](lds_bank_conflicts), we showed how a load from a single work-item is considered to have a theoretical bandwidth of 256B. +Recall, the [LDS](lds) can load up to $128B$ per cycle (i.e, 32 banks x 4B / bank / cycle). +Hence, we see that loading an 4B integer spends two cycles accessing the LDS ($2\ {cycle} = (256B) / (128\ B/{cycle})$). + +Looking at the next `conflicts` dispatch (i.e., two work-items) yields: + +```shell-session +$ omniperf analyze -p workloads/lds/mi200/ -b 12.2.4 12.2.6 --dispatch 257 -n per_kernel +<...> +-------------------------------------------------------------------------------- +12. Local Data Share (LDS) +12.2 LDS Stats +╒═════════╤════════════════╤═══════╤═══════╤═══════╤═══════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════╪═══════╪═══════╪═══════╪═══════════════════╡ +│ 12.2.4 │ Index Accesses │ 3.00 │ 3.00 │ 3.00 │ Cycles per kernel │ +├─────────┼────────────────┼───────┼───────┼───────┼───────────────────┤ +│ 12.2.6 │ Bank Conflict │ 1.00 │ 1.00 │ 1.00 │ Cycles per kernel │ +╘═════════╧════════════════╧═══════╧═══════╧═══════╧═══════════════════╛ +``` + +Here we see a bank conflict! What happened? + +Recall that the index for each thread was calculated as: + +```c++ +int index = threadIdx.x * nbanks; +``` + +Or, precisely 32 elements, and each element is 4B wide (for a standard integer). +That is, each thread strides back to the same bank in the LDS, such that each work-item we add to the dispatch results in another bank conflict! + +Recalling our discussion of bank conflicts in our [LDS](lds) description: + +>A bank conflict occurs when two (or more) work-items in a wavefront want to read, write, or atomically update different addresses that map to the same bank in the same cycle. +In this case, the conflict detection hardware will determined a new schedule such that the **access is split into multiple cycles with no conflicts in any single cycle.** + +Here we see the conflict resolution hardware in action! Because we have engineered our kernel to generate conflicts, we expect our bank conflict metric to scale linearly with the number of work-items: + +```{figure} images/ldsconflicts.* +:scale: 50 % +:alt: Comparison of LDS conflict cycles versus access cycles for our simple example. +:align: center + +Comparison of LDS conflict cycles versus access cycles for our simple example. +``` + +Here we show the comparison of the Index Accesses (12.2.4), to the Bank Conflicts (12.2.6) for the first 20 kernel invocations. +We see that each grows linearly, and there is a constant gap of 2 cycles between them (i.e., the first access is never considered a conflict). + + +Finally, we can use these two metrics to derive the Bank Conflict Rate (12.1.4). Since within an Index Access we have 32 banks that may need to be updated, we use: + +$$ +Bank\ Conflict\ Rate = 100 * ((Bank\ Conflicts / 32) / (Index\ Accesses - Bank\ Conflicts)) +$$ + +Plotting this, we see: + +```{figure} images/ldsconflictrate.* +:scale: 50 % +:alt: LDS Bank Conflict rate for our simple example. +:align: center + +LDS Bank Conflict rate for our simple example. +``` + +The bank conflict rate linearly increases with the number of work-items within a wavefront that are active, _approaching_ 100\%, but never quite reaching it. + + +(Occupancy_example)= +## Occupancy Limiters Example + + +In this [example](https://github.com/ROCm/omniperf/blob/amd-mainline/sample/occupancy.hip), we will investigate the use of the resource allocation panel in the [Workgroup Manager](SPI)'s metrics section to determine occupancy limiters. +This code contains several kernels to explore how both various kernel resources impact achieved occupancy, and how this is reported in Omniperf. + +This example was compiled and run on a MI250 accelerator using ROCm v5.6.0, and Omniperf v2.0.0: +```shell-session +$ hipcc -O3 occupancy.hip -o occupancy --save-temps +``` +We have again included the `--save-temps` flag to get the corresponding assembly. + +Finally, we generate our Omniperf profile as: +```shell-session +$ omniperf profile -n occupancy --no-roof -- ./occupancy +``` + +(Occupancy_experiment_design)= +### Design note + +For our occupancy test, we need to create a kernel that is resource heavy, in various ways. +For this purpose, we use the following (somewhat funny-looking) kernel: + +```c++ +constexpr int bound = 16; +__launch_bounds__(256) +__global__ void vgprbound(int N, double* ptr) { + double intermediates[bound]; + for (int i = 0 ; i < bound; ++i) intermediates[i] = N * threadIdx.x; + double x = ptr[threadIdx.x]; + for (int i = 0; i < 100; ++i) { + x += sin(pow(__shfl(x, i % warpSize) * intermediates[(i - 1) % bound], intermediates[i % bound])); + intermediates[i % bound] = x; + } + if (x == N) ptr[threadIdx.x] = x; +} +``` + +Here we try to use as many [VGPRs](valu) as possible, to this end: + - We create a small array of double precision floats, that we size to try to fit into registers (i.e., `bound`, this may need to be tuned depending on the ROCm version). + - We specify `__launch_bounds___(256)` to increase the number of VPGRs available to the kernel (by limiting the number of wavefronts that can be resident on a [CU](CU)). + - Write a unique non-compile time constant to each element of the array. + - Repeatedly permute and call relatively expensive math functions on our array elements. + - Keep the compiler from optimizing out any operations by faking a write to the `ptr` based on a run-time conditional. + +This yields a total of 122 VGPRs, but it is expected this number will depend on the exact ROCm/compiler version. + +```asm + .size _Z9vgprboundiPd, .Lfunc_end1-_Z9vgprboundiPd + ; -- End function + .section .AMDGPU.csdata +; Kernel info: +; codeLenInByte = 4732 +; NumSgprs: 68 +; NumVgprs: 122 +; NumAgprs: 0 +; <...> +; AccumOffset: 124 +``` + +We will use various permutations of this kernel to limit occupancy, and more importantly for the purposes of this example, demonstrate how this is reported in Omniperf. + +(VGPR_occupancy)= +### VGPR Limited + +For our first test, we use the `vgprbound` kernel discussed in the [design note](Occupancy_experiment_design). +After profiling, we run the analyze step on this kernel: + +```shell-session +$ omniperf analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 --dispatch 1 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═════════════════════════╤═════════╤══════════════╤══════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═════════════════════════╪═════════╪══════════════╪══════════════╪══════════════╪════════╡ +│ 0 │ vgprbound(int, double*) │ 1.00 │ 923093822.50 │ 923093822.50 │ 923093822.50 │ 100.00 │ +╘════╧═════════════════════════╧═════════╧══════════════╧══════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +2. System Speed-of-Light +2.1 Speed-of-Light +╒═════════╤═════════════════════╤═════════╤════════════╤═════════╤═══════════════╕ +│ Index │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ +╞═════════╪═════════════════════╪═════════╪════════════╪═════════╪═══════════════╡ +│ 2.1.15 │ Wavefront Occupancy │ 1661.24 │ Wavefronts │ 3328.00 │ 49.92 │ +╘═════════╧═════════════════════╧═════════╧════════════╧═════════╧═══════════════╛ + + +-------------------------------------------------------------------------------- +6. Workgroup Manager (SPI) +6.2 Workgroup Manager - Resource Allocation +╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡ +│ 6.2.0 │ Not-scheduled Rate (Workgroup Manager) │ 0.64 │ 0.64 │ 0.64 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.1 │ Not-scheduled Rate (Scheduler-Pipe) │ 24.94 │ 24.94 │ 24.94 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.2 │ Scheduler-Pipe Stall Rate │ 24.49 │ 24.49 │ 24.49 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.3 │ Scratch Stall Rate │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.4 │ Insufficient SIMD Waveslots │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.5 │ Insufficient SIMD VGPRs │ 94.90 │ 94.90 │ 94.90 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.6 │ Insufficient SIMD SGPRs │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.7 │ Insufficient CU LDS │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.8 │ Insufficient CU Barriers │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.9 │ Reached CU Workgroup Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.10 │ Reached CU Wavefront Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛ + + +-------------------------------------------------------------------------------- +7. Wavefront +7.1 Wavefront Launch Stats +╒═════════╤══════════╤════════╤════════╤════════╤═══════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪══════════╪════════╪════════╪════════╪═══════════╡ +│ 7.1.5 │ VGPRs │ 124.00 │ 124.00 │ 124.00 │ Registers │ +├─────────┼──────────┼────────┼────────┼────────┼───────────┤ +│ 7.1.6 │ AGPRs │ 4.00 │ 4.00 │ 4.00 │ Registers │ +├─────────┼──────────┼────────┼────────┼────────┼───────────┤ +│ 7.1.7 │ SGPRs │ 80.00 │ 80.00 │ 80.00 │ Registers │ +╘═════════╧══════════╧════════╧════════╧════════╧═══════════╛ +``` + +Here we see that the kernel indeed does use _around_ (but not exactly) 122 VGPRs, with the difference due to granularity of VGPR allocations. +In addition, we see that we have allocated 4 "[AGPRs](agprs)". +We note that on current CDNA2 accelerators, the `AccumOffset` field of the assembly metadata: +```asm +; AccumOffset: 124 +``` +denotes the divide between `VGPRs` and `AGPRs`. + + +Next, we examine our wavefront occupancy (2.1.15), and see that we are reaching only $\sim50\%$ of peak occupancy. +As a result, we see that: + - We are not scheduling workgroups $\sim25\%$ of [total scheduler-pipe cycles](TotalPipeCycles) (6.2.1); recall from the discussion of the [Workgroup manager](SPI), 25\% is the maximum. + - The scheduler-pipe is stalled (6.2.2) from scheduling workgroups due to resource constraints for the same $\sim25\%$ of the time. + - And finally, $\sim91\%$ of those stalls are due to a lack of SIMDs with the appropriate number of VGPRs available (6.2.5). + +That is, the reason we can't reach full occupancy is due to our VGPR usage, as expected! + +### LDS Limited + +To examine an LDS limited example, we must change our kernel slightly: + +```c++ +constexpr size_t fully_allocate_lds = 64ul * 1024ul / sizeof(double); +__launch_bounds__(256) +__global__ void ldsbound(int N, double* ptr) { + __shared__ double intermediates[fully_allocate_lds]; + for (int i = threadIdx.x ; i < fully_allocate_lds; i += blockDim.x) intermediates[i] = N * threadIdx.x; + __syncthreads(); + double x = ptr[threadIdx.x]; + for (int i = threadIdx.x; i < fully_allocate_lds; i += blockDim.x) { + x += sin(pow(__shfl(x, i % warpSize) * intermediates[(i - 1) % fully_allocate_lds], intermediates[i % fully_allocate_lds])); + __syncthreads(); + intermediates[i % fully_allocate_lds] = x; + } + if (x == N) ptr[threadIdx.x] = x; +} +``` + +where we now: + - allocate an 64 KiB LDS array per workgroup, and + - use our allocated LDS array instead of a register array + +Analyzing this: + +```shell-session +$ omniperf analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 7.1.8 --dispatch 3 +<...> +-------------------------------------------------------------------------------- +2. System Speed-of-Light +2.1 Speed-of-Light +╒═════════╤═════════════════════╤════════╤════════════╤═════════╤═══════════════╕ +│ Index │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ +╞═════════╪═════════════════════╪════════╪════════════╪═════════╪═══════════════╡ +│ 2.1.15 │ Wavefront Occupancy │ 415.52 │ Wavefronts │ 3328.00 │ 12.49 │ +╘═════════╧═════════════════════╧════════╧════════════╧═════════╧═══════════════╛ + + +-------------------------------------------------------------------------------- +6. Workgroup Manager (SPI) +6.2 Workgroup Manager - Resource Allocation +╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡ +│ 6.2.0 │ Not-scheduled Rate (Workgroup Manager) │ 0.13 │ 0.13 │ 0.13 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.1 │ Not-scheduled Rate (Scheduler-Pipe) │ 24.87 │ 24.87 │ 24.87 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.2 │ Scheduler-Pipe Stall Rate │ 24.84 │ 24.84 │ 24.84 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.3 │ Scratch Stall Rate │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.4 │ Insufficient SIMD Waveslots │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.5 │ Insufficient SIMD VGPRs │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.6 │ Insufficient SIMD SGPRs │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.7 │ Insufficient CU LDS │ 96.47 │ 96.47 │ 96.47 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.8 │ Insufficient CU Barriers │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.9 │ Reached CU Workgroup Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.10 │ Reached CU Wavefront Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛ + + +-------------------------------------------------------------------------------- +7. Wavefront +7.1 Wavefront Launch Stats +╒═════════╤════════════════╤══════════╤══════════╤══════════╤═══════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════╪══════════╪══════════╪══════════╪═══════════╡ +│ 7.1.5 │ VGPRs │ 96.00 │ 96.00 │ 96.00 │ Registers │ +├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤ +│ 7.1.6 │ AGPRs │ 0.00 │ 0.00 │ 0.00 │ Registers │ +├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤ +│ 7.1.7 │ SGPRs │ 80.00 │ 80.00 │ 80.00 │ Registers │ +├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤ +│ 7.1.8 │ LDS Allocation │ 65536.00 │ 65536.00 │ 65536.00 │ Bytes │ +╘═════════╧════════════════╧══════════╧══════════╧══════════╧═══════════╛ +``` + +We see that our VGPR allocation has gone down to 96 registers, but now we see our 64KiB LDS allocation (7.1.8). +In addition, we see a similar non-schedule rate (6.2.1) and stall rate (6.2.2) as in our [VGPR example](VGPR_occupancy). However, our occupancy limiter has now shifted from VGPRs (6.2.5) to LDS (6.2.7). + + +We note that although we see the around the same scheduler/stall rates (with our LDS limiter), our wave occupancy (2.1.15) is significantly lower ($\sim12\%$)! +This is important to remember: the occupancy limiter metrics in the resource allocation section tell you what the limiter was, but _not_ how much the occupancy was limited. +These metrics should always be analyzed in concert with the wavefront occupancy metric! + +### SGPR Limited + +Finally, we modify our kernel once more to make it limited by [SGPRs](salu): + +```c++ +constexpr int sgprlim = 1; +__launch_bounds__(1024, 8) +__global__ void sgprbound(int N, double* ptr) { + double intermediates[sgprlim]; + for (int i = 0 ; i < sgprlim; ++i) intermediates[i] = i; + double x = ptr[0]; + #pragma unroll 1 + for (int i = 0; i < 100; ++i) { + x += sin(pow(intermediates[(i - 1) % sgprlim], intermediates[i % sgprlim])); + intermediates[i % sgprlim] = x; + } + if (x == N) ptr[0] = x; +} +``` + +The major changes here are to: + - make as much as possible provably uniform across the wave (notice the lack of `threadIdx.x` in the `intermediates` initialization and elsewhere), + - addition of `__launch_bounds__(1024, 8)`, which reduces our maximum VGPRs to 64 (such that 8 waves can fit per SIMD), but causes some register spills (i.e., [Scratch](Mspace) usage), and + - lower the `bound` (here we use `sgprlim`) of the array to reduce VGPR/Scratch usage + +This results in the following assembly metadata for this kernel: +```asm + .size _Z9sgprboundiPd, .Lfunc_end3-_Z9sgprboundiPd + ; -- End function + .section .AMDGPU.csdata +; Kernel info: +; codeLenInByte = 4872 +; NumSgprs: 76 +; NumVgprs: 64 +; NumAgprs: 0 +; TotalNumVgprs: 64 +; ScratchSize: 60 +; <...> +; AccumOffset: 64 +; Occupancy: 8 +``` + +Analyzing this workload yields: + +```shell-session +$ omniperf analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 7.1.8 7.1.9 --dispatch 5 +<...> +-------------------------------------------------------------------------------- +0. Top Stat +╒════╤═════════════════════════╤═════════╤══════════════╤══════════════╤══════════════╤════════╕ +│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ +╞════╪═════════════════════════╪═════════╪══════════════╪══════════════╪══════════════╪════════╡ +│ 0 │ sgprbound(int, double*) │ 1.00 │ 782069812.00 │ 782069812.00 │ 782069812.00 │ 100.00 │ +╘════╧═════════════════════════╧═════════╧══════════════╧══════════════╧══════════════╧════════╛ + + +-------------------------------------------------------------------------------- +2. System Speed-of-Light +2.1 Speed-of-Light +╒═════════╤═════════════════════╤═════════╤════════════╤═════════╤═══════════════╕ +│ Index │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ +╞═════════╪═════════════════════╪═════════╪════════════╪═════════╪═══════════════╡ +│ 2.1.15 │ Wavefront Occupancy │ 3291.76 │ Wavefronts │ 3328.00 │ 98.91 │ +╘═════════╧═════════════════════╧═════════╧════════════╧═════════╧═══════════════╛ + + +-------------------------------------------------------------------------------- +6. Workgroup Manager (SPI) +6.2 Workgroup Manager - Resource Allocation +╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡ +│ 6.2.0 │ Not-scheduled Rate (Workgroup Manager) │ 7.72 │ 7.72 │ 7.72 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.1 │ Not-scheduled Rate (Scheduler-Pipe) │ 15.17 │ 15.17 │ 15.17 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.2 │ Scheduler-Pipe Stall Rate │ 7.38 │ 7.38 │ 7.38 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.3 │ Scratch Stall Rate │ 39.76 │ 39.76 │ 39.76 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.4 │ Insufficient SIMD Waveslots │ 26.32 │ 26.32 │ 26.32 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.5 │ Insufficient SIMD VGPRs │ 26.32 │ 26.32 │ 26.32 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.6 │ Insufficient SIMD SGPRs │ 25.52 │ 25.52 │ 25.52 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.7 │ Insufficient CU LDS │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.8 │ Insufficient CU Barriers │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.9 │ Reached CU Workgroup Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ +├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ +│ 6.2.10 │ Reached CU Wavefront Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ +╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛ + + +-------------------------------------------------------------------------------- +7. Wavefront +7.1 Wavefront Launch Stats +╒═════════╤════════════════════╤═══════╤═══════╤═══════╤════════════════╕ +│ Index │ Metric │ Avg │ Min │ Max │ Unit │ +╞═════════╪════════════════════╪═══════╪═══════╪═══════╪════════════════╡ +│ 7.1.5 │ VGPRs │ 64.00 │ 64.00 │ 64.00 │ Registers │ +├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ +│ 7.1.6 │ AGPRs │ 0.00 │ 0.00 │ 0.00 │ Registers │ +├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ +│ 7.1.7 │ SGPRs │ 80.00 │ 80.00 │ 80.00 │ Registers │ +├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ +│ 7.1.8 │ LDS Allocation │ 0.00 │ 0.00 │ 0.00 │ Bytes │ +├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ +│ 7.1.9 │ Scratch Allocation │ 60.00 │ 60.00 │ 60.00 │ Bytes/workitem │ +╘═════════╧════════════════════╧═══════╧═══════╧═══════╧════════════════╛ +``` + +Here we see that our wavefront launch stats (7.1) have changed to reflect the metadata seen in the `--save-temps` output. +Of particular interest, we see: + - The SGPR allocation (7.1.7) is 80 registers, slightly more than the 76 requested by the compiler due to allocation granularity, and + - We have a ['scratch'](Mspace) i.e., private memory, allocation of 60 bytes per work-item + +Analyzing the resource allocation block (6.2) we now see that for the first time, the 'Not-scheduled Rate (Workgroup Manager)' metric (6.2.0) has become non-zero. This is because the workgroup manager is responsible for management of scratch, which we see also contributes to our occupancy limiters in the 'Scratch Stall Rate' (6.2.3). We note that the sum of the workgroup manager not-scheduled rate and the scheduler-pipe non-scheduled rate is still $\sim25\%$, as in our previous examples + +Next, we see that the scheduler-pipe stall rate (6.2.2), i.e., how often we could not schedule a workgroup to a CU was only about $\sim8\%$. +This hints that perhaps, our kernel is not _particularly_ occupancy limited by resources, and indeed checking the wave occupancy metric (2.1.15) shows that this kernel is reaching nearly 99% occupancy! + +Finally, we inspect the occupancy limiter metrics and see a roughly even split between [waveslots](valu) (6.2.4), [VGPRs](valu) (6.2.5), and [SGPRs](salu) (6.2.6) along with the scratch stalls (6.2.3) previously mentioned. + +This is yet another reminder to view occupancy holistically. +While these metrics tell you why a workgroup cannot be scheduled, they do _not_ tell you what your occupancy was (consult wavefront occupancy) _nor_ whether increasing occupancy will be beneficial to performance. diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md new file mode 100644 index 0000000000..9c699ec7a9 --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md @@ -0,0 +1,356 @@ +# Profile Mode + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 5 +``` + +The [Omniperf](https://github.com/ROCm/omniperf) repository +includes source code for a sample GPU compute workload, +__vcopy.cpp__. A copy of this file is available in the `share/sample` +subdirectory after a normal Omniperf installation, or via the +`$OMNIPERF_SHARE/sample` directory when using the supplied modulefile. + +A compiled version of this workload is used throughout the following +sections to demonstrate the use of Omniperf in MI GPU performance +analysis. Unless otherwise noted, the performance analysis is done on +the MI200 platform. + +## Workload Compilation +**vcopy compilation:** +```shell-session +$ hipcc vcopy.cpp -o vcopy +$ ls +vcopy vcopy.cpp +$ ./vcopy -n 1048576 -b 256 +vcopy testing on GCD 0 +Finished allocating vectors on the CPU +Finished allocating vectors on the GPU +Finished copying vectors to the GPU +sw thinks it moved 1.000000 KB per wave +Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384: +Launching the kernel on the GPU +Finished executing kernel +Finished copying the output vector from the GPU to the CPU +Releasing GPU memory +Releasing CPU memory +``` + +## Omniperf Profiling +The *omniperf* executable, available through the Omniperf repository, is used to acquire all necessary performance monitoring data through analysis of compute workloads. + +### Features + +- __Automate counter collection__: Omniperf handles all of your profiling via preconfigured input files. +- __Filtering__: Apply runtime filters to speed up the profiling process. +- __Standalone Roofline__: Isolate a subset of built-in metrics or build your own profiling configuration. + +Run `omniperf profile -h` for more details. + +### Demo + +The following sample command profiles the *vcopy* workload. + +**vcopy profiling:** +```shell-session +$ omniperf profile --name vcopy -- ./vcopy -n 1048576 -b 256 + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +Omniperf version: 2.0.0 +Profiler choice: rocprofv1 +Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +Target: MI200 +Command: ./vcopy -n 1048576 -b 256 +Kernel Selection: None +Dispatch Selection: None +Hardware Blocks: All + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting Performance Counters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[profiling] Current input file: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200/perfmon/SQ_IFETCH_LEVEL.txt + |-> [rocprof] RPL: on '240312_174329' from '/opt/rocm-5.2.1' in '/home/auser/repos/omniperf/src/omniperf' + |-> [rocprof] RPL: profiling '""./vcopy -n 1048576 -b 256""' + |-> [rocprof] RPL: input file '/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/perfmon/SQ_IFETCH_LEVEL.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_240312_174329_692890' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_240312_174329_692890/input0_results_240312_174329' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_240312_174329_692890/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 6 metrics + |-> [rocprof] GRBM_COUNT, GRBM_GUI_ACTIVE, SQ_WAVES, SQ_IFETCH, SQ_IFETCH_LEVEL, SQ_ACCUM_PREV_HIRES + |-> [rocprof] vcopy testing on GCD 0 + |-> [rocprof] Finished allocating vectors on the CPU + |-> [rocprof] Finished allocating vectors on the GPU + |-> [rocprof] Finished copying vectors to the GPU + |-> [rocprof] sw thinks it moved 1.000000 KB per wave + |-> [rocprof] Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384: + |-> [rocprof] Launching the kernel on the GPU + |-> [rocprof] Finished executing kernel + |-> [rocprof] Finished copying the output vector from the GPU to the CPU + |-> [rocprof] Releasing GPU memory + |-> [rocprof] Releasing CPU memory + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 1 contexts collected, output directory /tmp/rpl_data_240312_174329_692890/input0_results_240312_174329 + |-> [rocprof] File '/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_IFETCH_LEVEL.csv' is generating + |-> [rocprof] +[profiling] Current input file: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200/perfmon/SQ_INST_LEVEL_LDS.txt + +... + +[roofline] Checking for roofline.csv in /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +[roofline] No roofline data found. Generating... +Empirical Roofline Calculation +Copyright © 2022 Advanced Micro Devices, Inc. All rights reserved. +Total detected GPU devices: 4 +GPU Device 0: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +HBM BW, GPU ID: 0, workgroupSize:256, workgroups:2097152, experiments:100, traffic:8589934592 bytes, duration:6.2 ms, mean:1388.0 GB/sec, stdev=3.1 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +L2 BW, GPU ID: 0, workgroupSize:256, workgroups:8192, experiments:100, traffic:687194767360 bytes, duration:136.5 ms, mean:5020.8 GB/sec, stdev=16.5 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +L1 BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:26843545600 bytes, duration:2.9 ms, mean:9229.5 GB/sec, stdev=2.9 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +LDS BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:33554432000 bytes, duration:1.9 ms, mean:17645.6 GB/sec, stdev=20.1 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak FLOPs (FP32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:274877906944, duration:13.078 ms, mean:20986.9 GFLOPS, stdev=310.8 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak FLOPs (FP64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:137438953472, duration:6.7 ms, mean:20408.029297.1 GFLOPS, stdev=2.7 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (BF16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:12.6 ms, mean:170280.0 GFLOPS, stdev=22.3 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:13.0 ms, mean:164733.6 GFLOPS, stdev=24.3 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:536870912000, duration:13.0 ms, mean:41399.6 GFLOPS, stdev=4.1 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA FLOPs (F64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:268435456000, duration:6.5 ms, mean:41379.2 GFLOPS, stdev=4.4 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] +Peak MFMA IOPs (I8), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, IOP:2147483648000, duration:12.9 ms, mean:166281.9 GOPS, stdev=2495.9 GOPS +GPU Device 1: Profiling... +... +GPU Device 2: Profiling... +... +GPU Device 3: Profiling... +... +``` + +```{tip} +To reduce verbosity of profiling output try the `--quiet` flag which will hide rocprofiler output and activate a progress bar. +``` + +You will notice two main stages in *default* Omniperf profiling. + +1. The first stage collects all the counters needed for Omniperf analysis (omitting any filters you have provided). + +2. The second stage collects data for the roofline analysis (this stage can be disabled using `--no-roof`) + +In this document, we use the term System on Chip (SoC) to refer to a particular family of accelerators. At the end of profiling, all resulting csv files should be located in a SoC specific target directory, e.g.: + - "MI300A" or "MI300X" for the AMD Instinct (tm) MI300 family of accelerators + - "MI200" for the AMD Instinct (tm) MI200 family of accelerators + - "MI100" for the AMD Instinct (tm) MI100 family of accelerators + - etc. + + The SoC names are generated as a part of Omniperf, and do not _always_ distinguish between different accelerators in the same family (e.g., an AMD Instinct (tm) MI210 vs an MI250) + +```{note} +Additionally, you will notice a few extra files. An SoC parameters file, *sysinfo.csv*, is created to reflect the target device settings. All profiling output is stored in *log.txt*. Roofline specific benchmark results are stored in *roofline.csv*. +``` + +```shell-session +$ ls workloads/vcopy/MI200/ +total 112 +total 60 +-rw-r--r-- 1 auser agroup 27937 Mar 1 15:15 log.txt +drwxr-xr-x 1 auser agroup 0 Mar 1 15:15 perfmon +-rw-r--r-- 1 auser agroup 26175 Mar 1 15:15 pmc_perf.csv +-rw-r--r-- 1 auser agroup 1708 Mar 1 15:17 roofline.csv +-rw-r--r-- 1 auser agroup 519 Mar 1 15:15 SQ_IFETCH_LEVEL.csv +-rw-r--r-- 1 auser agroup 456 Mar 1 15:15 SQ_INST_LEVEL_LDS.csv +-rw-r--r-- 1 auser agroup 474 Mar 1 15:15 SQ_INST_LEVEL_SMEM.csv +-rw-r--r-- 1 auser agroup 474 Mar 1 15:15 SQ_INST_LEVEL_VMEM.csv +-rw-r--r-- 1 auser agroup 599 Mar 1 15:15 SQ_LEVEL_WAVES.csv +-rw-r--r-- 1 auser agroup 650 Mar 1 15:15 sysinfo.csv +-rw-r--r-- 1 auser agroup 399 Mar 1 15:15 timestamps.csv +``` + +### Filtering +To reduce profiling time and the counters collected one may use profiling filters. Profiling filters and their functionality depend on the underlying profiler being used. While Omniperf is profiler agnostic, we have provided a detailed description of profiling filters available when using Omniperf with [rocProf](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html) below. + + + +Filtering Options: + +- The `-k` / `--kernel` \ flag allows for kernel filtering. Usage is equivalent with the current rocProf utility ([see details below](#kernel-filtering)). + +- The `-d` / `--dispatch` \ flag allows for dispatch ID filtering. Usage is equivalent with the current rocProf utility ([see details below](#dispatch-filtering)). + +- The `-b` / `--block` \ flag allows system profiling on one or more selected hardware components to speed up the profiling process ([see details below](#hardware-component-filtering)). + +```{tip} +Be cautious while combining different profiling filters in the same call. Conflicting filters may result in error. + +i.e. filtering dispatch X, but dispatch X does not match your kernel name filter +``` + +#### Hardware Component Filtering +One can profile specific hardware components to speed up the profiling process. In Omniperf, we use the term hardware block to refer to a hardware component or a group of hardware components. All profiling results are accumulated in the same target directory, without overwriting those for other hardware components, hence enabling the incremental profiling and analysis. + +The following example only gathers hardware counters for the Shader Sequencer (SQ) and L2 Cache (TCC) components, skipping all other hardware components: +```shell-session +$ omniperf profile --name vcopy -b SQ TCC -- ./vcopy -n 1048576 -b 256 + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +fname: pmc_cpc_perf: Skipped +fname: pmc_spi_perf: Skipped +fname: pmc_cpf_perf: Skipped +fname: pmc_tcp_perf: Skipped +fname: pmc_sq_perf4: Added +fname: pmc_tcc_perf: Added +fname: pmc_sq_perf8: Added +fname: pmc_ta_perf: Skipped +fname: pmc_sq_perf1: Added +fname: pmc_sq_perf3: Added +fname: pmc_td_perf: Skipped +fname: pmc_tcc2_perf: Skipped +fname: pmc_sqc_perf1: Skipped +fname: pmc_sq_perf6: Added +fname: pmc_sq_perf2: Added +Omniperf version: 2.0.0 +Profiler choice: rocprofv1 +Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +Target: MI200 +Command: ./vcopy -n 1048576 -b 256 +Kernel Selection: None +Dispatch Selection: None +Hardware Blocks: ['sq', 'tcc'] + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting Performance Counters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +... +``` + +#### Kernel Filtering +Kernel filtering is based on the name of the kernel(s) you would like to isolate. Use a kernel name substring list to isolate desired kernels. + +The following example demonstrates profiling isolating the kernel matching substring "vecCopy": +```shell-session +$ omniperf profile --name vcopy -k vecCopy -- ./vcopy -n 1048576 -b 256 + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +Omniperf version: 2.0.0 +Profiler choice: rocprofv1 +Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +Target: MI200 +Command: ./vcopy -n 1048576 -b 256 +Kernel Selection: ['vecCopy'] +Dispatch Selection: None +Hardware Blocks: All + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting Performance Counters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +... +``` + +#### Dispatch Filtering +Dispatch filtering is based on the *global* dispatch index of kernels in a run. + +The following example profiles only the first kernel dispatch in execution of the application (please note zero-based indexing): +```shell-session +$ omniperf profile --name vcopy -d 0 -- ./vcopy -n 1048576 -b 256 + + ___ _ __ + / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| +| | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ +| |_| | | | | | | | | | | |_) | __/ | | _| + \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| + |_| + +Omniperf version: 2.0.0 +Profiler choice: rocprofv1 +Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +Target: MI200 +Command: ./vcopy -n 1048576 -b 256 +Kernel Selection: None +Dispatch Selection: ['0'] +Hardware Blocks: All + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting Performance Counters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +... +``` + + +### Standalone Roofline +If you are only interested in generating roofline analysis data try using `--roof-only`. This will only collect counters relevant to roofline, as well as generate a standalone .pdf output of your roofline plot. + +Standalone Roofline Options: + +- The `--sort` \ allows you to specify whether you would like to overlay top kernel or top dispatch data in your roofline plot. + +- The `-m`/`--mem-level` \ allows you to specify specific level(s) of cache you would like to include in your roofline plot. + +- The `--device` \ allows you to specify a device id to collect performance data from when running our roofline benchmark on your system. + +- If you would like to distinguish different kernels in your .pdf roofline plot use `--kernel-names`. This will give each kernel a unique marker identifiable from the plot's key. + + +#### Roofline Only +The following example demonstrates profiling roofline data only: +```shell-session +$ omniperf profile --name vcopy --roof-only -- ./vcopy -n 1048576 -b 256 + +... +[roofline] Checking for roofline.csv in /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +[roofline] No roofline data found. Generating... +Checking for roofline.csv in /home/auser/repos/omniperf/sample/workloads/vcopy/MI200 +Empirical Roofline Calculation +Copyright © 2022 Advanced Micro Devices, Inc. All rights reserved. +Total detected GPU devices: 4 +GPU Device 0: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + ... +Empirical Roofline PDFs saved! +``` +An inspection of our workload output folder shows .pdf plots were generated successfully +```shell-session +$ ls workloads/vcopy/MI200/ +total 48 +-rw-r--r-- 1 auser agroup 13331 Mar 1 16:05 empirRoof_gpu-0_fp32_fp64.pdf +-rw-r--r-- 1 auser agroup 13136 Mar 1 16:05 empirRoof_gpu-0_int8_fp16.pdf +drwxr-xr-x 1 auser agroup 0 Mar 1 16:03 perfmon +-rw-r--r-- 1 auser agroup 1101 Mar 1 16:03 pmc_perf.csv +-rw-r--r-- 1 auser agroup 1715 Mar 1 16:05 roofline.csv +-rw-r--r-- 1 auser agroup 650 Mar 1 16:03 sysinfo.csv +-rw-r--r-- 1 auser agroup 399 Mar 1 16:03 timestamps.csv +``` +```{note} +Omniperf generates two roofline outputs to organize results and reduce clutter. One chart plots FP32/FP64 performance while the other plots I8/FP16 performance. +``` + +A sample *empirRoof_gpu-ALL_fp32_fp64.pdf* looks something like this: + +![Sample Standalone Roof Plot](images/sample-roof-plot.png) diff --git a/projects/rocprofiler-compute/docs/archive/requirements-doc.txt b/projects/rocprofiler-compute/docs/archive/requirements-doc.txt new file mode 100644 index 0000000000..0f063e835f --- /dev/null +++ b/projects/rocprofiler-compute/docs/archive/requirements-doc.txt @@ -0,0 +1,7 @@ +sphinx +myst-parser +recommonmark +pygments +sphinxmark +sphinx-rtd-theme +Pillow>=10.3.0 diff --git a/projects/rocprofiler-compute/docs/conceptual/command-processor.rst b/projects/rocprofiler-compute/docs/conceptual/command-processor.rst new file mode 100644 index 0000000000..873c8a3a68 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/command-processor.rst @@ -0,0 +1,153 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Command processor (CP) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, command, processor, fetcher, packet processor, CPF, CPC + +********************** +Command processor (CP) +********************** + +The command processor (CP) is responsible for interacting with the AMDGPU kernel +driver -- the Linux kernel -- on the CPU and for interacting with user-space +HSA clients when they submit commands to HSA queues. Basic tasks of the CP +include reading commands (such as, corresponding to a kernel launch) out of +:hsa-runtime-pdf:`HSA queues <68>`, scheduling work to subsequent parts of the +scheduler pipeline, and marking kernels complete for synchronization events on +the host. + +The command processor consists of two sub-components: + +* :ref:`Fetcher ` (CPF): Fetches commands out of memory to hand + them over to the CPC for processing. + +* :ref:`Packet processor ` (CPC): Micro-controller running the + command processing firmware that decodes the fetched commands and (for + kernels) passes them to the :ref:`workgroup processors ` for + scheduling. + +Before scheduling work to the accelerator, the command processor can +first acquire a memory fence to ensure system consistency +(:hsa-runtime-pdf:`Section 2.6.4 <91>`). After the work is complete, the +command processor can apply a memory-release fence. Depending on the AMD CDNA™ +accelerator under question, either of these operations *might* initiate a cache +write-back or invalidation. + +Analyzing command processor performance is most interesting for kernels +that you suspect to be limited by scheduling or launch rate. The command +processor’s metrics therefore are focused on reporting, for example: + +* Utilization of the fetcher + +* Utilization of the packet processor, and decoding processing packets + +* Stalls in fetching and processing + +.. _cpf-metrics: + +Command processor fetcher (CPF) +=============================== + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - CPF Utilization + + - Percent of total cycles where the CPF was busy actively doing any work. + The ratio of CPF busy cycles over total cycles counted by the CPF. + + - Percent + + * - CPF Stall + + - Percent of CPF busy cycles where the CPF was stalled for any reason. + + - Percent + + * - CPF-L2 Utilization + + - Percent of total cycles counted by the CPF-:doc:`L2 ` interface + where the CPF-L2 interface was active doing any work. The ratio of CPF-L2 + busy cycles over total cycles counted by the CPF-L2. + + - Percent + + * - CPF-L2 Stall + + - Percent of CPF-:doc:`L2 ` L2 busy cycles where the CPF-L2 + interface was stalled for any reason. + + - Percent + + * - CPF-UTCL1 Stall + + - Percent of CPF busy cycles where the CPF was stalled by address + translation. + + - Percent + +.. _cpc-metrics: + +Command processor packet processor (CPC) +======================================== + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - CPC Utilization + + - Percent of total cycles where the CPC was busy actively doing any work. + The ratio of CPC busy cycles over total cycles counted by the CPC. + + - Percent + + * - CPC Stall + + - Percent of CPC busy cycles where the CPC was stalled for any reason. + + - Percent + + * - CPC Packet Decoding Utilization + + - Percent of CPC busy cycles spent decoding commands for processing. + + - Percent + + * - CPC-Workgroup Manager Utilization + + - Percent of CPC busy cycles spent dispatching workgroups to the + :ref:`workgroup manager `. + + - Percent + + * - CPC-L2 Utilization + + - Percent of total cycles counted by the CPC-:doc:`L2 ` interface + where the CPC-L2 interface was active doing any work. + + - Percent + + * - CPC-UTCL1 Stall + + - Percent of CPC busy cycles where the CPC was stalled by address + translation. + + - Percent + + * - CPC-UTCL2 Utilization + + - Percent of total cycles counted by the CPC's :doc:`L2 ` address + translation interface where the CPC was busy doing address translation + work. + + - Percent diff --git a/projects/rocprofiler-compute/docs/conceptual/compute-unit.rst b/projects/rocprofiler-compute/docs/conceptual/compute-unit.rst new file mode 100644 index 0000000000..d7d701b4b3 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/compute-unit.rst @@ -0,0 +1,59 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Compute unit (CU) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, GCN, compute, unit, pipeline, workgroup, wavefront, + CDNA + +***************** +Compute unit (CU) +***************** + +The compute unit (CU) is responsible for executing a user's kernels on +CDNA™-based accelerators. All :ref:`wavefronts ` of a +:ref:`workgroup ` are scheduled on the same CU. + +.. image:: ../data/performance-model/gcn_compute_unit.png + :align: center + :alt: AMD CDNA accelerator compute unit diagram + :width: 800 + +The CU consists of several independent execution pipelines and functional units. +The :doc:`/conceptual/pipeline-descriptions` section details the various +execution pipelines -- VALU, SALU, LDS, scheduler, and so forth. The metrics +presented by ROCm Compute Profiler for these pipelines are described in +:doc:`pipeline-metrics`. The :doc:`vL1D ` cache and +:doc:`LDS ` are described in their own sections. + +* The :ref:`desc-valu` is composed of multiple SIMD (single + instruction, multiple data) vector processors, vector general purpose + registers (VGPRs) and instruction buffers. The VALU is responsible for + executing much of the computational work on CDNA accelerators, including but + not limited to floating-point operations (FLOPs) and integer operations + (IOPs). + +* The vector memory (VMEM) unit is responsible for issuing loads, stores and + atomic operations that interact with the memory system. + +* The :ref:`desc-salu` is shared by all threads in a + :ref:`wavefront `, and is responsible for executing + instructions that are known to be uniform across the wavefront at compile + time. The SALU has a memory unit (SMEM) for interacting with memory, but it + cannot issue separately from the SALU. + +* The :doc:`local-data-share` is an on-CU software-managed scratchpad memory + that can be used to efficiently share data between all threads in a + :ref:`workgroup `. + +* The :ref:`desc-scheduler` is responsible for issuing and decoding instructions + for all the :ref:`wavefronts ` on the compute unit. + +* The :doc:`vector L1 data cache (vL1D) ` is the first level + cache local to the compute unit. On current CDNA accelerators, the vL1D is + write-through. The vL1D caches from multiple compute units are kept coherent + with one another through software instructions. + +* CDNA accelerators -- that is, AMD Instinct™ MI100 and newer -- contain + specialized matrix-multiplication accelerator pipelines known as the + :ref:`desc-mfma`. + +For a more in-depth description of a compute unit on a CDNA accelerator, see +:hip-training-pdf:`22` and :gcn-crash-course:`27`. diff --git a/projects/rocprofiler-compute/docs/conceptual/definitions.rst b/projects/rocprofiler-compute/docs/conceptual/definitions.rst new file mode 100644 index 0000000000..0f6692f741 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/definitions.rst @@ -0,0 +1,152 @@ +.. meta:: + :description: ROCm Compute Profiler terminology and definitions + :keywords: Omniperf, ROCm Compute Profiler, ROCm, glossary, definitions, terms, profiler, tool, + Instinct, accelerator, AMD + +*********** +Definitions +*********** + +The following table briefly defines some terminology used in ROCm Compute Profiler interfaces +and in this documentation. + +.. include:: ./includes/terms.rst + +.. include:: ./includes/normalization-units.rst + +.. _memory-spaces: + +Memory spaces +============= + +AMD Instinct™ MI-series accelerators can access memory through multiple address spaces +which may map to different physical memory locations on the system. The +following table provides a view into how various types of memory used +in HIP map onto these constructs: + +.. list-table:: + :header-rows: 1 + + * - LLVM Address Space + - Hardware Memory Space + - HIP Terminology + + * - Generic + - Flat + - N/A + + * - Global + - Global + - Global + + * - Local + - LDS + - LDS/Shared + + * - Private + - Scratch + - Private + + * - Constant + - Same as global + - Constant + +The following is a high-level description of the address spaces in the AMDGPU +backend of LLVM: + +.. list-table:: + :header-rows: 1 + + * - Address space + - Description + + * - Global + - Memory that can be seen by all threads in a process, and may be backed by + the local accelerator's HBM, a remote accelerator's HBM, or the CPU's + DRAM. + + * - Local + - Memory that is only visible to a particular workgroup. On AMD's Instinct + accelerator hardware, this is stored in :doc:`LDS ` + memory. + + * - Private + - Memory that is only visible to a particular [work-item](workitem) + (thread), stored in the scratch space on AMD's Instinct accelerators. + + * - Constant + - Read-only memory that is in the global address space and stored on the + local accelerator's HBM. + + * - Generic + - Used when the compiler cannot statically prove that a pointer is + addressing memory in a single (non-generic) address space. Mapped to Flat + on AMD's Instinct accelerators, the pointer could dynamically address + global, local, private or constant memory. + +`LLVM's documentation for AMDGPU Backend `_ +has the most up-to-date information. Refer to this source for a more complete +explanation. + +.. _memory-type: + +Memory type +=========== + +AMD Instinct accelerators contain a number of different memory allocation +types to enable the HIP language's +:doc:`memory coherency model `. +These memory types are broadly similar between AMD Instinct accelerator +generations, but may differ in exact implementation. + +In addition, these memory types *might* differ between accelerators on the same +system, even when accessing the same memory allocation. + +For example, an :ref:`MI2XX ` accelerator accessing *fine-grained* +memory allocated local to that device may see the allocation as coherently +cacheable, while a remote accelerator might see the same allocation as +*uncached*. + +These memory types include: + +.. list-table:: + :header-rows: 1 + + * - Memory type + - Description + + * - Uncached Memory (UC) + - Memory that will not be cached in this accelerator. On + :ref:`MI2XX ` accelerators, this corresponds “fine-grained” + (or, “coherent”) memory allocated on a remote accelerator or the host, + for example, using ``hipHostMalloc`` or ``hipMallocManaged`` with default + allocation flags. + + * - Non-hardware-Coherent Memory (NC) + - Memory that will be cached by the accelerator, and is only guaranteed to + be consistent at kernel boundaries / after software-driven + synchronization events. On :ref:`MI2XX ` accelerators, this + type of memory maps to, for example, “coarse-grained” ``hipHostMalloc``’d + memory -- that is, allocated with the ``hipHostMallocNonCoherent`` + flag -- or ``hipMalloc``’d memory allocated on a remote accelerator. + + * - Coherently Cachable (CC) + - Memory for which only reads from the accelerator where the memory was + allocated will be cached. Writes to CC memory are uncached, and trigger + invalidations of any line within this accelerator. On + :ref:`MI2XX ` accelerators, this type of memory maps to + “fine-grained” memory allocated on the local accelerator using, for + example, the ``hipExtMallocWithFlags`` API using the + ``hipDeviceMallocFinegrained`` flag. + + * - Read/Write Coherent Memory (RW) + - Memory that will be cached by the accelerator, but may be invalidated by + writes from remote devices at kernel boundaries / after software-driven + synchronization events. On :ref:`MI2XX ` accelerators, this + corresponds to “coarse-grained” memory allocated locally to the + accelerator, using for example, the default ``hipMalloc`` allocator. + +Find a good discussion of coarse and fine-grained memory allocations and what +type of memory is returned by various combinations of memory allocators, flags +and arguments in the +`Crusher quick-start guide `_. diff --git a/projects/rocprofiler-compute/docs/conceptual/includes/normalization-units.rst b/projects/rocprofiler-compute/docs/conceptual/includes/normalization-units.rst new file mode 100644 index 0000000000..c6e6474eaf --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/includes/normalization-units.rst @@ -0,0 +1,46 @@ +.. _normalization-units: + +Normalization units +=================== + +A user-configurable unit by which you can choose to normalize data. Options +include: + +.. list-table:: + :header-rows: 1 + + * - Name + - Description + + * - ``per_wave`` + - The total value of the measured counter or metric that occurred per + kernel invocation divided by the total number of + :ref:`wavefronts ` launched in the kernel. + + * - ``per_cycle`` + - The total value of the measured counter or metric that occurred per + kernel invocation divided by the + :ref:`kernel cycles `, that is, the total number of + cycles the kernel executed as measured by the + :doc:`command processor `. + + * - ``per_kernel`` + - The total value of the measured counter or metric that occurred per + kernel invocation. + + * - ``per_second`` + - The total value of the measured counter or metric that occurred per + kernel invocation divided by the :ref:`kernel time `, + that is, the total runtime of the kernel in seconds, as measured by the + :doc:`command processor `. + +By default, ROCm Compute Profiler uses the ``per_wave`` normalization. + +.. tip:: + + The best normalization may vary depending on your use case. For instance, a + ``per_second`` normalization might be useful for FLOP or bandwidth + comparisons, while a ``per_wave`` normalization could be useful to see how many + (and what types) of instructions are used per wavefront. A ``per_kernel`` + normalization can be useful to get the total aggregate values of metrics for + comparison between different configurations. diff --git a/projects/rocprofiler-compute/docs/conceptual/includes/terms.rst b/projects/rocprofiler-compute/docs/conceptual/includes/terms.rst new file mode 100644 index 0000000000..e37f7b9712 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/includes/terms.rst @@ -0,0 +1,187 @@ +.. _desc-workgroup: + +.. _desc-work-item: + +.. _desc-wavefront: + +.. _desc-divergence: + +.. _kernel-time: + +.. _kernel-cycles: + +.. _total-active-cu-cycles: + +.. _total-cu-cycles: + +.. _total-se-cycles: + +.. _total-simd-cycles: + +.. _total-pipe-cycles: + +.. _total-l1i-cycles: + +.. _total-active-l2-cycles: + +.. _total-l2-cycles: + +.. _total-sl1d-cycles: + +.. _thread-requests: + +.. list-table:: + :header-rows: 1 + + * - Name + + - Description + + - Unit + + * - Kernel time + + - The number of seconds the accelerator was executing a kernel, from the + :doc:`command processor `'s (CP) start-of-kernel + timestamp (a number of cycles after the CP beings processing the packet) + to the CP's end-of-kernel timestamp (a number of cycles before the CP + stops processing the packet). + + - Seconds + + * - Kernel cycles + + - The number of cycles the accelerator was active doing *any* work, as + measured by the :doc:`command processor ` (CP). + + - Cycles + + * - Total CU cycles + + - The number of cycles the accelerator was active doing *any* work + (that is, kernel cycles), multiplied by the number of + :doc:`compute units ` on the accelerator. A + measure of the total possible active cycles the compute units could be + doing work, useful for the normalization of metrics inside the CU. + + - Cycles + + * - Total active CU cycles + + - The number of cycles a CU on the accelerator was active doing *any* + work, summed over all :doc:`compute units ` on the + accelerator. + + - Cycles + + * - Total SIMD cycles + + - The number of cycles the accelerator was active doing *any* work (that + is, kernel cycles), multiplied by the number of + :doc:`SIMDs ` on the accelerator. A measure of the + total possible active cycles the SIMDs could be doing work, useful for + the normalization of metrics inside the CU. + + - Cycles + + * - Total L2 cycles + + - The number of cycles the accelerator was active doing *any* work (that + is, kernel cycles), multiplied by the number of :doc:`L2 ` + channels on the accelerator. A measure of the total possible active + cycles the L2 channels could be doing work, useful for the normalization + of metrics inside the L2. + + - Cycles + + * - Total active L2 cycles + + - The number of cycles a channel of the L2 cache was active doing *any* + work, summed over all :doc:`L2 ` channels on the accelerator. + + - Cycles + + * - Total sL1D cycles + + - The number of cycles the accelerator was active doing *any* work (that + is, kernel cycles), multiplied by the number of + :ref:`scalar L1 data caches ` on the accelerator. A measure of + the total possible active cycles the sL1Ds could be doing work, useful + for the normalization of metrics inside the sL1D. + + - Cycles + + * - Total L1I cycles + + - The number of cycles the accelerator was active doing *any* work (that + is, kernel cycles), multiplied by the number of + :ref:`L1 instruction caches ` (L1I) on the accelerator. A + measure of the total possible active cycles the L1Is could be doing + work, useful for the normalization of metrics inside the L1I. + + - Cycles + + * - Total scheduler-pipe cycles + + - The number of cycles the accelerator was active doing *any* work (that + is, kernel cycles), multiplied by the number of + :doc:`scheduler pipes ` on the accelerator. A measure + of the total possible active cycles the scheduler-pipes could be doing + work, useful for the normalization of metrics inside the + :ref:`workgroup manager ` and + :doc:`command processor `. + + - Cycles + + * - Total shader-engine cycles + + - The total number of cycles the accelerator was active doing *any* work, + multiplied by the number of :doc:`shader engines ` on the + accelerator. A measure of the total possible active cycles the shader + engines could be doing work, useful for the normalization of + metrics inside the :ref:`workgroup manager `. + + - Cycles + + * - Thread-requests + + - The number of unique memory addresses accessed by a single memory + instruction. On AMD Instinct accelerators, this has a maximum of 64 + (that is, the size of the :ref:`wavefront `). + + - Addresses + + * - Work-item + + - A single *thread*, or lane, of execution that executes in lockstep with + the rest of the work-items comprising a :ref:`wavefront ` + of execution. + + - N/A + + * - Wavefront + + - A group of work-items, or threads, that execute in lockstep on the + :doc:`compute unit `. On AMD Instinct accelerators, the + wavefront size is always 64 work-items. + + - N/A + + * - Workgroup + + - A group of wavefronts that execute on the same + :doc:`compute unit `, and can cooperatively execute and + share data via the use of synchronization primitives, + :doc:`LDS `, atomics, and others. + + - N/A + + * - Divergence + + - Divergence within a wavefront occurs when not all work-items are active + when executing an instruction, that is, due to non-uniform control flow + within a wavefront. Can reduce execution efficiency by causing, + for instance, the :ref:`VALU ` to need to execute both + branches of a conditional with different sets of work-items active. + + - N/A diff --git a/projects/rocprofiler-compute/docs/conceptual/l2-cache.rst b/projects/rocprofiler-compute/docs/conceptual/l2-cache.rst new file mode 100644 index 0000000000..b9752f1baa --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/l2-cache.rst @@ -0,0 +1,754 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: L2 cache (TCC) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, L2, cache, infinity fabric, metrics + +************** +L2 cache (TCC) +************** + +The L2 cache is the coherence point for current AMD Instinct™ MI-series GCN™ +GPUs and CDNA™ accelerators, and is shared by all :doc:`CUs ` +on the device. Besides serving requests from the +:doc:`vector L1 data caches `, the L2 cache also is responsible +for servicing requests from the :ref:`L1 instruction caches `, the +:ref:`scalar L1 data caches ` and the +:doc:`command processor `. The L2 cache is composed of a +number of distinct channels (32 on MI100 and :ref:`MI2XX ` series CDNA +accelerators at 256B address interleaving) which can largely operate +independently. Mapping of incoming requests to a specific L2 channel is +determined by a hashing mechanism that attempts to evenly distribute requests +across the L2 channels. Requests that miss in the L2 cache are passed out to +:ref:`Infinity Fabric™ ` to be routed to the appropriate memory +location. + +The L2 cache metrics reported by ROCm Compute Profiler are broken down into four +categories: + +* :ref:`L2 Speed-of-Light ` + +* :ref:`L2 cache accesses ` + +* :ref:`L2-Fabric transactions ` + +* :ref:`L2-Fabric stalls ` + +.. _l2-sol: + +L2 Speed-of-Light +================= + +.. warning:: + + The theoretical maximum throughput for some metrics in this section + are currently computed with the maximum achievable clock frequency, as + reported by ``rocminfo``, for an accelerator. This may not be realistic for + all workloads. + +The L2 cache’s speed-of-light table contains a few key metrics about the +performance of the L2 cache, aggregated over all the L2 channels, as a +comparison with the peak achievable values of those metrics: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Utilization + + - The ratio of the + :ref:`number of cycles an L2 channel was active, summed over all L2 channels on the accelerator ` + over the :ref:`total L2 cycles `. + + - Percent + + * - Bandwidth + + - The number of bytes looked up in the L2 cache, as a percent of the peak + theoretical bandwidth achievable on the specific accelerator. The number + of bytes is calculated as the number of cache lines requested multiplied + by the cache line size. This value does not consider partial requests, so + e.g., if only a single value is requested in a cache line, the data + movement will still be counted as a full cache line. + + - Percent + + * - Hit Rate + + - The ratio of the number of L2 cache line requests that hit in the L2 + cache over the total number of incoming cache line requests to the L2 + cache. + + - Percent + + * - L2-Fabric Read BW + + - The number of bytes read by the L2 over the + :ref:`Infinity Fabric interface ` per unit time. + + - GB/s + + * - L2-Fabric Write and Atomic BW + + - The number of bytes sent by the L2 over the + :ref:`Infinity Fabric interface ` by write and atomic + operations per unit time. + + - GB/s + +.. note:: + + The L2 cache on AMD Instinct MI CDNA accelerators uses a "hit-on-miss" + approach to reporting cache hits. That is, if while satisfying a miss, + another request comes in that would hit on the same pending cache line, the + subsequent request will be counted as a 'hit'. Therefore, it is also + important to consider the latency metric in the :ref:`L2-Fabric ` + section when evaluating the L2 hit rate. + +.. _l2-cache-accesses: + +L2 cache accesses +================= + +This section details the incoming requests to the L2 cache from the +:doc:`vL1D ` and other clients -- for instance, the +:ref:`sL1D ` and :ref:`L1I ` caches. + +.. list-table:: + :header-rows: 1 + :widths: 13 70 17 + + * - Metric + + - Description + + - Unit + + * - Bandwidth + + - The number of bytes looked up in the L2 cache, per + :ref:`normalization unit `. The number of bytes is + calculated as the number of cache lines requested multiplied by the cache + line size. This value does not consider partial requests, so for example, + if only a single value is requested in a cache line, the data movement + will still be counted as a full cache line. + + - Bytes per :ref:`normalization unit `. + + * - Requests + + - The total number of incoming requests to the L2 from all clients for all + request types, per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit `. + + * - Read Requests + + - The total number of read requests to the L2 from all clients. + + - Requests per :ref:`normalization unit ` + + * - Write Requests + + - The total number of write requests to the L2 from all clients. + + - Requests per :ref:`normalization unit ` + + * - Atomic Requests + + - The total number of atomic requests (with and without return) to the L2 + from all clients. + + - Requests per :ref:`normalization unit ` + + * - Streaming Requests + + - The total number of incoming requests to the L2 that are marked as + *streaming*. The exact meaning of this may differ depending on the + targeted accelerator, however on an :ref:`MI2XX ` this + corresponds to + `non-temporal load or stores `_. + The L2 cache attempts to evict *streaming* requests before normal + requests when the L2 is at capacity. + + - Requests per :ref:`normalization unit ` + + * - Probe Requests + + - The number of coherence probe requests made to the L2 cache from outside + the accelerator. On an :ref:`MI2XX `, probe requests may be + generated by, for example, writes to + :ref:`fine-grained device ` memory or by writes to + :ref:`coarse-grained ` device memory. + + - Requests per :ref:`normalization unit ` + + * - Hit Rate + + - The ratio of the number of L2 cache line requests that hit in the L2 + cache over the total number of incoming cache line requests to the L2 + cache. + + - Percent + + * - Hits + + - The total number of requests to the L2 from all clients that hit in the + cache. As noted in the :ref:`Speed-of-Light ` section, this + includes hit-on-miss requests. + + - Requests per :ref:`normalization unit ` + + * - Misses + + - The total number of requests to the L2 from all clients that miss in the + cache. As noted in the :ref:`Speed-of-Light ` section, these do + not include hit-on-miss requests. + + - Requests per :ref:`normalization unit ` + + * - Writebacks + + - The total number of L2 cache lines written back to memory for any reason. + Write-backs may occur due to user code (such as HIP kernel calls to + ``__threadfence_system`` or atomic built-ins) by the + :doc:`command processor `'s memory acquire/release + fences, or for other internal hardware reasons. + + - Cache lines per :ref:`normalization unit ` + + * - Writebacks (Internal) + + - The total number of L2 cache lines written back to memory for internal + hardware reasons, per :ref:`normalization unit `. + + - Cache lines per :ref:`normalization unit `. + + * - Writebacks (vL1D Req) + + - The total number of L2 cache lines written back to memory due to requests + initiated by the :doc:`vL1D cache `, per + :ref:`normalization unit `. + + - Cache lines per :ref:`normalization unit `. + + * - Evictions (Normal) + + - The total number of L2 cache lines evicted from the cache due to capacity + limits, per :ref:`normalization unit `. + + - Cache lines per :ref:`normalization unit `. + + * - Evictions (vL1D Req) + + - The total number of L2 cache lines evicted from the cache due to + invalidation requests initiated by the + :doc:`vL1D cache `, per + :ref:`normalization unit `. + + - Cache lines per :ref:`normalization unit `. + + * - Non-hardware-Coherent Requests + + - The total number of requests to the L2 to Not-hardware-Coherent (NC) + memory allocations, per :ref:`normalization unit `. + See the :ref:`memory-type` for more information. + + - Requests per :ref:`normalization unit `. + + * - Uncached Requests + + - The total number of requests to the L2 that go to Uncached (UC) memory + allocations. See the :ref:`memory-type` for more information. + + - Requests per :ref:`normalization unit `. + + * - Coherently Cached Requests + + - The total number of requests to the L2 that go to Coherently Cacheable (CC) + memory allocations. See the :ref:`memory-type` for more information. + + - Requests per :ref:`normalization unit `. + + * - Read/Write Coherent Requests + + - The total number of requests to the L2 that go to Read-Write coherent memory + (RW) allocations. See the :ref:`memory-type` for more information. + + - Requests per :ref:`normalization unit `. + +.. note:: + + All requests to the L2 are for a single cache line's worth of data. The size + of a cache line may vary depending on the accelerator, however on an AMD + Instinct CDNA2 :ref:`MI2XX ` accelerator, it is 128B, while on + an MI100, it is 64B. + +.. _l2-fabric: + +L2-Fabric transactions +====================== + +Requests/data that miss in the L2 must be routed to memory in order to +service them. The backing memory for a request may be local to this +accelerator (i.e., in the local high-bandwidth memory), in a remote +accelerator’s memory, or even in the CPU’s memory. Infinity Fabric +is responsible for routing these memory requests/data to the correct +location and returning any fetched data to the L2 cache. The +:ref:`l2-request-flow` describes the flow of these requests through +Infinity Fabric in more detail, as described by ROCm Compute Profiler metrics, +while :ref:`l2-request-metrics` give detailed definitions of +individual metrics. + +.. _l2-request-flow: + +Request flow +------------ + +The following is a diagram that illustrates how L2↔Fabric requests are reported +by ROCm Compute Profiler: + +.. figure:: ../data/performance-model/l2perf_model.png + :align: center + :alt: L2-Fabric transaction flow on AMD Instinct MI-series accelerators + :width: 800 + + L2↔Fabric transaction flow on AMD Instinct MI-series accelerators. + + +Requests from the L2 Cache are broken down into two major categories, read +requests and write requests (at this granularity, atomic requests are treated +as writes). + +From there, these requests can additionally subdivided in a number of ways. +First, these requests may be sent across Infinity Fabric as different +transaction sizes, 32B or 64B on current CDNA accelerators. + +.. note:: + + On current CDNA accelerators, the 32B read request path is expected to be + unused and so is disconnected in the flow diagram. + +In addition, the read and write requests can be further categorized as: + +* Uncached read/write requests, for instance: for access to + :ref:`fine-grained memory ` + +* Atomic requests, for instance: for atomic updates to + :ref:`fine-grained memory ` + +* HBM read/write requests OR remote read/write requests, for instance: for + requests to the accelerator’s local HBM OR requests to a remote accelerator’s + HBM or the CPU’s DRAM + +These classifications are not necessarily *exclusive*. For example, a +write request can be classified as an atomic request to the +accelerator’s local HBM, and an uncached write request. The request-flow +diagram marks *exclusive* classifications as a splitting of the flow, +while *non-exclusive* requests do not split the flow line. For example, +a request is either a 32B Write Request OR a 64B Write request, as the +flow splits at this point. + +However, continuing along, the same request might be an atomic request and an +uncached write request, as reflected by a non-split flow. + +Finally, we note that :ref:`uncached ` read requests (e.g., to +:ref:`fine-grained memory `) are handled specially on CDNA +accelerators, as indicated in the request flow diagram. These are +expected to be counted as a 64B Read Request, and *if* they are requests +to uncached memory (denoted by the dashed line), they will also be +counted as *two* uncached read requests (that is, the request is split). + + +.. _l2-request-metrics: + +Metrics +------- + + The following metrics are reported for the L2-Fabric interface: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - L2-Fabric Read Bandwidth + + - The total number of bytes read by the L2 cache from Infinity Fabric per + :ref:`normalization unit `. + + - Bytes per :ref:`normalization unit `. + + * - HBM Read Traffic + + - The percent of read requests generated by the L2 cache that are routed to + the accelerator's local high-bandwidth memory (HBM). This breakdown does + not consider the *size* of the request (meaning that 32B and 64B requests + are both counted as a single request), so this metric only *approximates* + the percent of the L2-Fabric Read bandwidth directed to the local HBM. + + - Percent + + * - Remote Read Traffic + + - The percent of read requests generated by the L2 cache that are routed to + any memory location other than the accelerator's local high-bandwidth + memory (HBM) -- for example, the CPU's DRAM or a remote accelerator's + HBM. This breakdown does not consider the *size* of the request (meaning + that 32B and 64B requests are both counted as a single request), so this + metric only *approximates* the percent of the L2-Fabric Read bandwidth + directed to a remote location. + + - Percent + + * - Uncached Read Traffic + + - The percent of read requests generated by the L2 cache that are reading + from an :ref:`uncached memory allocation `. Note, as + described in the :ref:`request flow ` section, a single + 64B read request is typically counted as two uncached read requests. So, + it is possible for the Uncached Read Traffic to reach up to 200% of the + total number of read requests. This breakdown does not consider the + *size* of the request (i.e., 32B and 64B requests are both counted as a + single request), so this metric only *approximates* the percent of the + L2-Fabric read bandwidth directed to an uncached memory location. + + - Percent + + * - L2-Fabric Write and Atomic Bandwidth + + - The total number of bytes written by the L2 over Infinity Fabric by write + and atomic operations per + :ref:`normalization unit `. Note that on current + CDNA accelerators, such as the :ref:`MI2XX `, requests are + only considered *atomic* by Infinity Fabric if they are targeted at + non-write-cacheable memory, for example, + :ref:`fine-grained memory ` allocations or + :ref:`uncached memory ` allocations on the + MI2XX. + + - Bytes per :ref:`normalization unit `. + + * - HBM Write and Atomic Traffic + + - The percent of write and atomic requests generated by the L2 cache that + are routed to the accelerator's local high-bandwidth memory (HBM). This + breakdown does not consider the *size* of the request (meaning that 32B + and 64B requests are both counted as a single request), so this metric + only *approximates* the percent of the L2-Fabric Write and Atomic + bandwidth directed to the local HBM. Note that on current CDNA + accelerators, such as the :ref:`MI2XX `, requests are only + considered *atomic* by Infinity Fabric if they are targeted at + :ref:`fine-grained memory ` allocations or + :ref:`uncached memory ` allocations. + + - Percent + + * - Remote Write and Atomic Traffic + + - The percent of read requests generated by the L2 cache that are routed to + any memory location other than the accelerator's local high-bandwidth + memory (HBM) -- for example, the CPU's DRAM or a remote accelerator's + HBM. This breakdown does not consider the *size* of the request (meaning + that 32B and 64B requests are both counted as a single request), so this + metric only *approximates* the percent of the L2-Fabric Read bandwidth + directed to a remote location. Note that on current CDNA + accelerators, such as the :ref:`MI2XX `, requests are only + considered *atomic* by Infinity Fabric if they are targeted at + :ref:`fine-grained memory ` allocations or + :ref:`uncached memory ` allocations. + + - Percent + + * - Atomic Traffic + + - The percent of write requests generated by the L2 cache that are atomic + requests to *any* memory location. This breakdown does not consider the + *size* of the request (meaning that 32B and 64B requests are both counted + as a single request), so this metric only *approximates* the percent of + the L2-Fabric Read bandwidth directed to a remote location. Note that on + current CDNA accelerators, such as the :ref:`MI2XX `, + requests are only considered *atomic* by Infinity Fabric if they are + targeted at :ref:`fine-grained memory ` allocations or + :ref:`uncached memory ` allocations. + + - Percent + + * - Uncached Write and Atomic Traffic + + - The percent of write and atomic requests generated by the L2 cache that + are targeting :ref:`uncached memory allocations `. This + breakdown does not consider the *size* of the request (meaning that 32B + and 64B requests are both counted as a single request), so this metric + only *approximates* the percent of the L2-Fabric read bandwidth directed + to uncached memory allocations. + + - Percent + + * - Read Latency + + - The time-averaged number of cycles read requests spent in Infinity Fabric + before data was returned to the L2. + + - Cycles + + * - Write Latency + + - The time-averaged number of cycles write requests spent in Infinity + Fabric before a completion acknowledgement was returned to the L2. + + - Cycles + + * - Atomic Latency + + - The time-averaged number of cycles atomic requests spent in Infinity + Fabric before a completion acknowledgement (atomic without return value) + or data (atomic with return value) was returned to the L2. + + - Cycles + + * - Read Stall + + - The ratio of the total number of cycles the L2-Fabric interface was + stalled on a read request to any destination (local HBM, remote PCIe® + connected accelerator or CPU, or remote Infinity Fabric connected + accelerator [#inf]_ or CPU) over the + :ref:`total active L2 cycles `. + + - Percent + + * - Write Stall + + - The ratio of the total number of cycles the L2-Fabric interface was + stalled on a write or atomic request to any destination (local HBM, + remote accelerator or CPU, PCIe connected accelerator or CPU, or remote + Infinity Fabric connected accelerator [#inf]_ or CPU) over the + :ref:`total active L2 cycles `. + + - Percent + +.. _l2-detailed-metrics: + +Detailed transaction metrics +---------------------------- + +The following metrics are available in the detailed L2-Fabric +transaction breakdown table: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - 32B Read Requests + + - The total number of L2 requests to Infinity Fabric to read 32B of data + from any memory location, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. Typically unused on CDNA + accelerators. + + - Requests per :ref:`normalization unit `. + + * - Uncached Read Requests + + - The total number of L2 requests to Infinity Fabric to read + :ref:`uncached data ` from any memory location, per + :ref:`normalization unit `. 64B requests for + uncached data are counted as two 32B uncached data requests. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - 64B Read Requests + + - The total number of L2 requests to Infinity Fabric to read 64B of data + from any memory location, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - HBM Read Requests + + - The total number of L2 requests to Infinity Fabric to read 32B or 64B of + data from the accelerator's local HBM, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - Remote Read Requests + + - The total number of L2 requests to Infinity Fabric to read 32B or 64B of + data from any source other than the accelerator's local HBM, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - 32B Write and Atomic Requests + + - The total number of L2 requests to Infinity Fabric to write or atomically + update 32B of data to any memory location, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - Uncached Write and Atomic Requests + + - The total number of L2 requests to Infinity Fabric to write or atomically + update 32B or 64B of :ref:`uncached data `, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - 64B Write and Atomic Requests + + - The total number of L2 requests to Infinity Fabric to write or atomically + update 64B of data in any memory location, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - HBM Write and Atomic Requests + + - The total number of L2 requests to Infinity Fabric to write or atomically + update 32B or 64B of data in the accelerator's local HBM, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - Remote Write and Atomic Requests + + - The total number of L2 requests to Infinity Fabric to write or atomically + update 32B or 64B of data in any memory location other than the + accelerator's local HBM, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. + + - Requests per :ref:`normalization unit `. + + * - Atomic Requests + + - The total number of L2 requests to Infinity Fabric to atomically update + 32B or 64B of data in any memory location, per + :ref:`normalization unit `. See + :ref:`l2-request-flow` for more detail. Note that on current CDNA + accelerators, such as the :ref:`MI2XX `, requests are only + considered *atomic* by Infinity Fabric if they are targeted at + non-write-cacheable memory, such as + :ref:`fine-grained memory ` allocations or + :ref:`uncached memory ` allocations on the MI2XX. + + - Requests per :ref:`normalization unit `. + +.. _l2-fabric-stalls: + +L2-Fabric interface stalls +========================== + +When the interface between the L2 cache and Infinity Fabric becomes backed up by +requests, it may stall, preventing the L2 from issuing additional requests to +Infinity Fabric until prior requests complete. This section gives a breakdown of +what types of requests in a kernel caused a stall (like read versus write), and +to which locations -- for instance, to the accelerator’s local memory, or to +remote accelerators or CPUs. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Read - PCIe Stall + + - The number of cycles the L2-Fabric interface was stalled on read requests + to remote PCIe connected accelerators [#inf]_ or CPUs as a percent of the + :ref:`total active L2 cycles `. + + - Percent + + * - Read - Infinity Fabric Stall + + - The number of cycles the L2-Fabric interface was stalled on read requests + to remote Infinity Fabric connected accelerators [#inf]_ or CPUs as a + percent of the :ref:`total active L2 cycles `. + + - Percent + + * - Read - HBM Stall + + - The number of cycles the L2-Fabric interface was stalled on read requests + to the accelerator's local HBM as a percent of the + :ref:`total active L2 cycles `. + + - Percent + + * - Write - PCIe Stall + + - The number of cycles the L2-Fabric interface was stalled on write or + atomic requests to remote PCIe connected accelerators [#inf]_ or CPUs as + a percent of the :ref:`total active L2 cycles `. + + - Percent + + * - Write - Infinity Fabric Stall + + - The number of cycles the L2-Fabric interface was stalled on write or + atomic requests to remote Infinity Fabric connected accelerators [#inf]_ + or CPUs as a percent of the + :ref:`total active L2 cycles `. + + - Percent + + * - Write - HBM Stall + + - The number of cycles the L2-Fabric interface was stalled on write or + atomic requests to accelerator's local HBM as a percent of the + :ref:`total active L2 cycles `. + + - Percent + + * - Write - Credit Starvation + + - The number of cycles the L2-Fabric interface was stalled on write or + atomic requests to any memory location because too many write/atomic + requests were currently in flight, as a percent of the + :ref:`total active L2 cycles `. + + - Percent + +.. warning:: + + On current CDNA accelerators and GCN GPUs, these L2↔Fabric stalls can be undercounted in some circumstances. + +.. rubric:: Footnotes + +.. [#inf] In addition to being used for on-accelerator data-traffic, AMD + `Infinity Fabric `_ + technology can be used to connect multiple accelerators to achieve advanced + peer-to-peer connectivity and enhanced bandwidths over traditional PCIe + connections. Some AMD Instinct MI-series accelerators like the MI250X feature coherent CPU-to-accelerator connections are built using AMD Infinity Fabric. For more information, see the `AMD CDNA2 white paper `_. + +.. rubric:: Disclaimer + +PCIe® is a registered trademark of PCI-SIG Corporation. diff --git a/projects/rocprofiler-compute/docs/conceptual/local-data-share.rst b/projects/rocprofiler-compute/docs/conceptual/local-data-share.rst new file mode 100644 index 0000000000..121384de42 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/local-data-share.rst @@ -0,0 +1,182 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Local data share (LDS) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, local, data, share, LDS + +********************** +Local data share (LDS) +********************** + +.. _lds-sol: + +LDS Speed-of-Light +================== + +.. warning:: + + The theoretical maximum throughput for some metrics in this section are + currently computed with the maximum achievable clock frequency, as reported + by ``rocminfo``, for an accelerator. This may not be realistic for all + workloads. + +The :ref:`LDS ` speed-of-light chart shows a number of key metrics for +the LDS as a comparison with the peak achievable values of those metrics. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Utilization + + - Indicates what percent of the kernel's duration the :ref:`LDS ` + was actively executing instructions (including, but not limited to, load, + store, atomic and HIP's ``__shfl`` operations). Calculated as the ratio + of the total number of cycles LDS was active over the + :ref:`total CU cycles `. + + - Percent + + * - Access Rate + + - Indicates the percentage of SIMDs in the :ref:`VALU ` [#lds-workload]_ + actively issuing LDS instructions, averaged over the lifetime of the + kernel. Calculated as the ratio of the total number of cycles spent by + the :ref:`scheduler ` issuing :ref:`LDS ` + instructions over the + :ref:`total CU cycles `. + + - Percent + + * - Theoretical Bandwidth (% of Peak) + + - Indicates the maximum amount of bytes that *could* have been loaded from, + stored to, or atomically updated in the LDS in this kernel, as a percent + of the peak LDS bandwidth achievable. See the + :ref:`LDS bandwidth example ` for more detail. + + - Percent + + * - Bank Conflict Rate + + - Indicates the percentage of active LDS cycles that were spent servicing + bank conflicts. Calculated as the ratio of LDS cycles spent servicing + bank conflicts over the number of LDS cycles that would have been + required to move the same amount of data in an uncontended access. [#lds-bank-conflict]_ + + - Percent + +.. rubric:: Footnotes + +.. [#lds-workload] Here we assume the typical case where the workload evenly distributes + LDS operations over all SIMDs in a CU (that is, waves on different SIMDs are + executing similar code). For highly unbalanced workloads, where e.g., one + SIMD pair in the CU does not issue LDS instructions at all, this metric is + better interpreted as the percentage of SIMDs issuing LDS instructions on + :ref:`SIMD pairs ` that are actively using the LDS, averaged over + the lifetime of the kernel. + +.. [#lds-bank-conflict] The maximum value of the bank conflict rate is less than 100% + (specifically: 96.875%), as the first cycle in the + :ref:`LDS scheduler ` is never considered contended. + +.. _lds-stats: + +Statistics +========== + +The LDS statistics panel gives a more detailed view of the hardware: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - LDS Instructions + + - The total number of LDS instructions (including, but not limited to, + read/write/atomics and HIP's ``__shfl`` instructions) executed per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Theoretical Bandwidth + + - Indicates the maximum amount of bytes that could have been loaded from, + stored to, or atomically updated in the LDS per + :ref:`normalization unit `. Does *not* take into + account the execution mask of the wavefront when the instruction was + executed. See the + :ref:`LDS bandwidth example ` for more detail. + + - Bytes per :ref:`normalization unit ` + + * - LDS Latency + + - The average number of round-trip cycles (i.e., from issue to data-return + / acknowledgment) required for an LDS instruction to complete. + + - Cycles + + * - Bank Conflicts/Access + + - The ratio of the number of cycles spent in the + :ref:`LDS scheduler ` due to bank conflicts (as determined by + the conflict resolution hardware) to the base number of cycles that would + be spent in the LDS scheduler in a completely uncontended case. This is + the unnormalized form of the Bank Conflict Rate. + + - Conflicts/Access + + * - Index Accesses + + - The total number of cycles spent in the :ref:`LDS scheduler ` + over all operations per :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Atomic Return Cycles + + - The total number of cycles spent on LDS atomics with return per + :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Bank Conflicts + + - The total number of cycles spent in the :ref:`LDS scheduler ` + due to bank conflicts (as determined by the conflict resolution hardware) + per :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Address Conflicts + + - The total number of cycles spent in the :ref:`LDS scheduler ` + due to address conflicts (as determined by the conflict resolution + hardware) per :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Unaligned Stall + + - The total number of cycles spent in the :ref:`LDS scheduler ` + due to stalls from non-dword aligned addresses per + :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Memory Violations + + - The total number of out-of-bounds accesses made to the LDS, per + :ref:`normalization unit `. This is unused and + expected to be zero in most configurations for modern CDNA™ accelerators. + + - Accesses per :ref:`normalization unit ` diff --git a/projects/rocprofiler-compute/docs/conceptual/performance-model.rst b/projects/rocprofiler-compute/docs/conceptual/performance-model.rst new file mode 100644 index 0000000000..3d3ab508db --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/performance-model.rst @@ -0,0 +1,181 @@ +.. meta:: + :description: ROCm Compute Profiler performance model + :keywords: Omniperf, ROCm Compute Profiler, ROCm, performance, model, profiler, tool, Instinct, + accelerator, AMD, CDNA + +***************** +Performance model +***************** + +ROCm Compute Profiler makes available an extensive list of metrics to better understand +achieved application performance on AMD Instinct™ MI-series accelerators +including Graphics Core Next™ (GCN) GPUs like the AMD Instinct MI50, CDNA™ +accelerators like the MI100, CDNA2 accelerators such as the AMD Instinct MI250X, MI250, +and MI210, CDNA3 accelerators such as the AMD Instinct MI300A, MI300X, MI325X, and CDNA4 accelerators such as MI350X and MI355X. + +The table provides key details and support available for the different architectures: + +✅: Supported +❌: Unsupported + +**Architecture details** + +.. table:: + :widths: 30 30 30 30 30 + + +-----------------+-----------+---------------------------------+-------------------------------------+-------------------------+ + |Architecture |CDNA |CDNA 2 |CDNA 3 |CDNA 4 | + +=================+===========+=================================+=====================================+=========================+ + |Chip packaging |Single Die |Two graphics Compute Dies (GCDs) |One logical processor with dozen |Similar to CDNA3, | + | | |into single package. |chiplets, configurable with partition|Multi-Die chiplet, but | + | | | |modes. |with two I/O Dies (IODs) | + +-----------------+-----------+---------------------------------+-------------------------------------+-------------------------+ + |Supported series |MI100 |MI200 |MI300A |MI350X | + | | +---------------------------------+-------------------------------------+-------------------------+ + | | |MI210 |MI300X |MI355X | + | | +---------------------------------+-------------------------------------+-------------------------+ + | | |MI250 |MI325X | | + +-----------------+-----------+---------------------------------+-------------------------------------+-------------------------+ + |Spatial partition|❌ |❌ |Compute partition mode and |Compute partition mode | + |mode | | |Memory partition mode |and Memory partition mode| + +-----------------+-----------+---------------------------------+-------------------------------------+-------------------------+ + +**Data type support** + +.. list-table:: + :header-rows: 1 + + * + - Architecture + - FP32 + - FP64 + - FP16 + - INT32 ADD/LOGIC/MAD + - INT8 DOT + - INT4 DOT + - FP32 GEMM + - FP64 GEMM + - FP16 GEMM + - BF16 GEMM + - INT8 GEMM + - Packed FP32 + - TF32 GEMM + - FP8/BF8 + * + - CDNA + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + * + - CDNA2 + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ❌ + - ❌ + * + - CDNA3 + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + * + - CDNA4 + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + - ❌ + - ✅ + +To best use profiling data, it's important to understand the role of various +hardware blocks of AMD Instinct accelerators. Refer to the following top level GPU architecture diagram to understand the hardware blocks of each architectures. + +.. tab-set:: + + .. tab-item:: CDNA + + .. image:: ../data/conceptual/CDNA.png + :alt: CDNA top level architecture diagram with zoomed view of Compute unit + + .. tab-item:: CDNA2 + + .. image:: ../data/conceptual/CDNA2.png + :alt: CDNA2 top level architecture diagram with zoomed view of Compute unit + + .. tab-item:: CDNA3 + + .. image:: ../data/conceptual/CDNA3.png + :alt: CDNA3 top level architecture diagram with zoomed view of Accelerator Complex Dies (XCDs) + + .. tab-item:: CDNA4 + + .. image:: ../data/conceptual/CDNA4.png + :alt: CDNA4 top level architecture diagram + +This section describes each hardware block on the accelerator as interacted with by a software developer to +give a deeper understanding of the metrics reported by profiling data. Refer to +:doc:`/tutorial/profiling-by-example` for more practical examples and details on how +to use ROCm Compute Profiler to optimize your code. + +.. _mixxx-note: + +.. note:: + + In this documentation, **MI2XX** refers to any of the CDNA2 architecture-based MI200 series accelerators such as AMD + Instinct MI250X, MI250, and MI210 accelerators interchangeably in cases + where the exact product at hand is not relevant. For product details, see `AMD Instinct GPUs `_. + + For a comparison of AMD Instinct accelerator specifications, refer to + :doc:`Hardware specifications `. + +In this chapter, the AMD Instinct performance model used by ROCm Compute Profiler is divided into a handful of +key hardware blocks, each detailed in the following sections: + +* :doc:`compute-unit` + +* :doc:`l2-cache` + +* :doc:`shader-engine` + +* :doc:`command-processor` + +* :doc:`system-speed-of-light` \ No newline at end of file diff --git a/projects/rocprofiler-compute/docs/conceptual/pipeline-descriptions.rst b/projects/rocprofiler-compute/docs/conceptual/pipeline-descriptions.rst new file mode 100644 index 0000000000..defb0a9912 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/pipeline-descriptions.rst @@ -0,0 +1,298 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Shader engine (SE) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, pipeline, VALU, SALU, VMEM, SMEM, LDS, branch, + scheduler, MFMA, AGPRs + +********************* +Pipeline descriptions +********************* + +This section details the various execution pipelines of the +:doc:`compute unit `. + +.. _desc-valu: + +.. _desc-vmem: + +Vector arithmetic logic unit (VALU) +----------------------------------- + +The vector arithmetic logic unit (VALU) executes vector instructions +over an entire wavefront, each :ref:`work-item ` (or, +vector-lane) potentially operating on distinct data. The VALU of a CDNA™ +accelerator or GCN™ GPU typically consists of: + +* Four 16-wide SIMD processors (see :hip-training-pdf:`24` for more details). + +* Four 64 or 128 KiB VGPR files (yielding a total of 256-512 KiB total + per CU), see :ref:`AGPRs ` for more detail. + +* An instruction buffer (per-SIMD) that contains execution slots for up + to 8 wavefronts (for 32 total wavefront slots on each CU). + +* A vector memory (VMEM) unit which transfers data between VGPRs and + memory; each work-item supplies its own memory address and supplies + or receives unique data. + +* CDNA accelerators, such as the MI100 and :ref:`MI2XX `, contain + additional + :amd-lab-note:`Matrix Fused Multiply-Add (MFMA) ` + units. + +To support branching and conditionals, each wavefront in the VALU +has a distinct execution mask which determines which work-items in the +wavefront are active for the currently executing instruction. When +executing a VALU instruction, inactive work-items (according to the +current execution mask of the wavefront) do not execute the instruction +and are treated as no-ops. + +.. note:: + + On GCN GPUs and the CDNA MI100 accelerator, there are slots for up to 10 + wavefronts in the instruction buffer, but generally occupancy is limited by + other factors to 32 waves per :doc:`compute unit `. + On the CDNA2 :ref:`MI2XX ` series accelerators, there are only 8 + waveslots per-SIMD. + +.. _desc-salu: + +.. _desc-smem: + +Scalar arithmetic logic unit (SALU) +----------------------------------- + +The scalar arithmetic logic unit (SALU) executes instructions that are +shared between all work-items in a wavefront. This includes control flow +such as if/else conditionals, branches and looping pointer arithmetic, loading +common values, and more. + +The SALU consists of: + +* A scalar processor capable of various arithmetic, conditional, and + comparison (etc.) operations. See + :mi200-isa-pdf:`Chapter 5. Scalar ALU Operations <35>` + of the CDNA2 Instruction Set Architecture (ISA) Reference Guide for more + detail. + +* A 12.5 KiB Scalar General Purpose Register (SGPR) file + +* A scalar memory (SMEM) unit which transfers data between SGPRs and + memory + +Data loaded by the SMEM can be cached in the :ref:`scalar L1 data cache `, +and is typically only used for read-only, uniform accesses such as kernel +arguments, or HIP’s ``__constant__`` memory. + +.. _desc-lds: + +Local data share (LDS) +---------------------- + +The local data share (LDS, a.k.a., "shared memory") is fast on-CU scratchpad +that can be explicitly managed by software to effectively share data and to +coordinate between wavefronts in a workgroup. + +.. figure:: ../data/performance-model/lds.* + :align: center + :alt: Performance model of the local data share (LDS) on AMD Instinct + accelerators + :width: 800 + + Performance model of the local data share (LDS) on AMD Instinct MI-series + accelerators. + +Above is ROCm Compute Profiler's performance model of the LDS on CDNA accelerators (adapted +from :mantor-gcn-pdf:`20`). The SIMDs in the :ref:`VALU ` are +connected to the LDS in pairs (see above). Only one SIMD per pair may issue an +LDS instruction at a time, but both pairs may issue concurrently. + +On CDNA accelerators, the LDS contains 32 banks and each bank is 4B wide. +The LDS is designed such that each bank can be read from, written to, or +atomically updated every cycle, for a total throughput of 128B/clock +(:gcn-crash-course:`40`). + +On each of the two ports to the SIMDs, 64B can be sent in each direction per +cycle. So, a single wavefront, coming from one of the 2 SIMDs in a pair, can +only get back 64B/cycle (16 lanes per cycle). The input port is shared between +data and address and this can affect achieved bandwidth for different data +sizes. For example, a 64-wide store where each lane is sending a 4B value takes +8 cycles (50% peak bandwidth) while a 64-wide store where each lane is sending +a 16B value takes 20 cycles (80% peak bandwidth). + +In addition, the LDS contains conflict-resolution hardware to detect and handle +bank conflicts. A bank conflict occurs when two (or more) +:ref:`work-items ` in a :ref:`wavefront ` want +to read, write, or atomically update different addresses that map to the same +bank in the same cycle. In this case, the conflict detection hardware will +determine a new schedule such that the access is split into multiple cycles with +no conflicts in any single cycle. + +When multiple work-items want to read from the same address within a bank, the +result can be efficiently broadcasted (:gcn-crash-course:`41`). Multiple +work-items writing to the same address within a bank typically results undefined +behavior in HIP and other high-level languages, as the LDS will write the value from the +last work-item as determined by the hardware scheduler (:gcn-crash-course:`41`). +This behavior may be useful in the very specific case of storing a uniform +value. + +Relatedly, an address conflict is defined as occurring when two (or more) +work-items in a wavefront want to atomically update the same address on the same +cycle. As in a bank-conflict, this may cause additional cycles of work for the +LDS operation to complete. + +.. _desc-branch: + +Branch +------ + +The branch unit is responsible for executing jumps and branches to execute +control flow operations. +Note that Branch operations are not used for execution mask updates, but only +for “whole wavefront” control-flow changes. + +.. _desc-scheduler: + +Scheduler +--------- + +The scheduler is responsible for arbitration and issue of instructions for all +the wavefronts currently executing on the :doc:`CU `. On every +clock cycle, the scheduler: + +* Considers waves from one of the SIMD units for execution, selected in a + round-robin fashion between the SIMDs in the compute unit + +* Issues up to one instruction per wavefront on the selected SIMD + +* Issues up to one instruction per each of the instruction categories among the waves on the selected SIMD: + + * :ref:`VALU ` + + * :ref:`VMEM ` operations + + * :ref:`SALU ` / SMEM operations + + * :ref:`LDS ` + + * :ref:`Branch ` operations + +This gives a maximum of five issued Instructions Per Cycle (IPC), per-SIMD, +per-CU (:hip-training-pdf:`Introduction to AMD GPU Programming with HIP <>`, +:gcn-crash-course:`The AMD GCN Architecture - A Crash Course <>`). On CDNA +accelerators with :ref:`MFMA ` instructions, these are issued via the +:ref:`VALU `. Some of them will execute on a separate functional unit +and typically allow other :ref:`VALU ` operations to execute in their +shadow (see the :ref:`MFMA ` section for more detail). + +.. note:: + + The IPC model used by ROCm Compute Profiler omits the following two complications for + clarity. First, CDNA accelerators contain other execution units on the CU + that are unused for compute applications. Second, so-called "internal" + instructions (see :gcn-crash-course:`29`) are not issued to a functional + unit, and can technically cause the maximum IPC to *exceed* 5 instructions + per-cycle in special (largely unrealistic) cases. The latter issue is + discussed in more detail in the + :ref:`'internal' IPC ` example. + +.. _desc-mfma: + +Matrix fused multiply-add (MFMA) +-------------------------------- + +CDNA accelerators, such as the MI100 and :ref:`MI2XX `, contain +specialized hardware to accelerate matrix-matrix multiplications, also +known as Matrix Fused Multiply-Add (MFMA) operations. The exact +operation types and supported formats may vary by accelerator. Refer to the +:amd-lab-note:`AMD matrix cores ` +blog post on GPUOpen for a general discussion of these hardware units. +In addition, to explore the available MFMA instructions in-depth on +various AMD accelerators (including the CDNA line), we recommend the +`AMD Matrix Instruction Calculator `_: + +.. code-block:: shell + :caption: Partial snapshot of the AMD Matrix Instruction Calculator Tool + + $ ./matrix_calculator.py –architecture cdna2 –instruction v_mfma_f32_4x4x1f32 –detail-instruction + Architecture: CDNA2 + Instruction: V_MFMA_F32_4X4X1F32 + Encoding: VOP3P-MAI + VOP3P Opcode: 0x42 + VOP3P-MAI Opcode: 0x2 + Matrix Dimensions: + M: 4 + N: 4 + K: 1 + blocks: 16 + Execution statistics: + FLOPs: 512 + Execution cycles: 8 + FLOPs/CU/cycle: 256 + Can co-execute with VALU: True + VALU co-execution cycles possible: 4 + Register usage: + GPRs required for A: 1 + GPRs required for B: 1 + GPRs required for C: 4 + GPRs required for D: 4 + GPR alignment requirement: 8 bytes + +For the purposes of ROCm Compute Profiler, the MFMA unit is typically treated as a separate +pipeline from the :ref:`VALU `, as other VALU instructions (along +with other execution pipelines such as the :ref:`SALU `) typically can be +issued during a portion of the total duration of an MFMA operation. + +.. note:: + + The exact details of VALU and MFMA operation co-execution vary by + instruction, and can be explored in more detail via the following fields in + the + `AMD Matrix Instruction Calculator's detailed instruction information `_: + + * ``Can co-execute with VALU`` + + * ``VALU co-execution cycles possible`` + + +Non-pipeline resources +---------------------- + +In this section, we describe a few resources that are not standalone +pipelines but are important for understanding performance optimization +on CDNA accelerators. + +.. _desc-barrier: + +Barrier +^^^^^^^ + +Barriers are resources on the compute-unit of a CDNA accelerator that +are used to implement synchronization primitives (for example, HIP’s +``__syncthreads``). Barriers are allocated to any workgroup that +consists of more than a single wavefront. + +.. _desc-agprs: + +Accumulation vector general-purpose registers (AGPRs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Accumulation vector general-purpose registers, or AGPRs, are special +resources that are accessible to a subset of instructions focused on +:ref:`MFMA ` operations. These registers allow the MFMA +unit to access more than the normal maximum of 256 architected +:ref:`vector general-purpose registers (VGPRs) ` by having up to 256 +in the architected space and up to 256 in the accumulation space. +Traditional VALU instructions can only use VGPRs in the architected +space, and data can be moved to/from VGPRs↔AGPRs using specialized +instructions (``v_accvgpr_*``). These data movement instructions may be +used by the compiler to implement lower-cost register-spill/fills on +architectures with AGPRs. + +AGPRs are not available on all AMD Instinct™ accelerators. GCN GPUs, +such as the AMD Instinct MI50 had a 256 KiB VGPR file. The AMD +Instinct MI100 (CDNA) has a 2x256 KiB register file, where one half +is available as general-purpose VGPRs, and the other half is for matrix +math accumulation VGPRs (AGPRs). The AMD Instinct :ref:`MI2XX ` +(CDNA2) has a 512 KiB VGPR file per CU, where each wave can dynamically request +up to 256 KiB of VGPRs and an additional 256 KiB of AGPRs. For more information, +refer to `this comment `_. diff --git a/projects/rocprofiler-compute/docs/conceptual/pipeline-metrics.rst b/projects/rocprofiler-compute/docs/conceptual/pipeline-metrics.rst new file mode 100644 index 0000000000..7c37ee846a --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/pipeline-metrics.rst @@ -0,0 +1,915 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Pipeline metrics + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, pipeline, wavefront, metrics, launch, runtime + VALU, MFMA, instruction mix, FLOPs, arithmetic, operations + +**************** +Pipeline metrics +**************** + +In this section, we describe the metrics available in ROCm Compute Profiler to analyze the +pipelines discussed in the :doc:`pipeline-descriptions`. + +.. _wavefront: + +Wavefront +========= + +.. _wavefront-launch-stats: + +Wavefront launch stats +---------------------- + +The wavefront launch stats panel gives general information about the +kernel launch: + +.. list-table:: + :header-rows: 1 + :widths: 20 65 15 + + * - Metric + + - Description + + - Unit + + * - Grid Size + + - The total number of work-items (or, threads) launched as a part of + the kernel dispatch. In HIP, this is equivalent to the total grid size + multiplied by the total workgroup (or, block) size. + + - :ref:`Work-items ` + + * - Workgroup Size + + - The total number of work-items (or, threads) in each workgroup + (or, block) launched as part of the kernel dispatch. In HIP, this is + equivalent to the total block size. + + - :ref:`Work-items ` + + * - Total Wavefronts + + - The total number of wavefronts launched as part of the kernel dispatch. + On AMD Instinct™ CDNA™ accelerators and GCN™ GPUs, the wavefront size is + always 64 work-items. Thus, the total number of wavefronts should be + equivalent to the ceiling of grid size divided by 64. + + - :ref:`Wavefronts ` + + * - Saved Wavefronts + + - The total number of wavefronts saved at a context-save. See + `cwsr_enable `_. + + - :ref:`Wavefronts ` + + * - Restored Wavefronts + + - The total number of wavefronts restored from a context-save. See + `cwsr_enable `_. + + - :ref:`Wavefronts ` + + * - VGPRs + + - The number of architected vector general-purpose registers allocated for + the kernel, see :ref:`VALU `. Note: this may not exactly + match the number of VGPRs requested by the compiler due to allocation + granularity. + + - :ref:`VGPRs ` + + * - AGPRs + + - The number of accumulation vector general-purpose registers allocated for + the kernel, see :ref:`AGPRs `. Note: this may not exactly + match the number of AGPRs requested by the compiler due to allocation + granularity. + + - :ref:`AGPRs ` + + * - SGPRs + + - The number of scalar general-purpose registers allocated for the kernel, + see :ref:`SALU `. Note: this may not exactly match the number + of SGPRs requested by the compiler due to allocation granularity. + + - :ref:`SGPRs ` + + * - LDS Allocation + + - The number of bytes of :doc:`LDS ` memory (or, shared + memory) allocated for this kernel. Note: This may also be larger than + what was requested at compile time due to both allocation granularity and + dynamic per-dispatch LDS allocations. + + - Bytes per :ref:`workgroup ` + + * - Scratch Allocation + + - The number of bytes of :ref:`scratch memory ` requested + per work-item for this kernel. Scratch memory is used for stack memory + on the accelerator, as well as for register spills and restores. + + - Bytes per :ref:`work-item ` + +.. _wavefront-runtime-stats: + +Wavefront runtime stats +----------------------- + +The wavefront runtime statistics gives a high-level overview of the +execution of wavefronts in a kernel: + +.. list-table:: + :header-rows: 1 + :widths: 18 65 17 + + * - Metric + + - Description + + - Unit + + * - :ref:`Kernel time ` + + - The total duration of the executed kernel. Note: this should not be + directly compared to the wavefront cycles / timings below. + + - Nanoseconds + + * - :ref:`Kernel cycles ` + + - The total duration of the executed kernel in cycles. Note: this should + not be directly compared to the wavefront cycles / timings below. + + - Cycles + + * - Instructions per wavefront + + - The average number of instructions (of all types) executed per wavefront. + This is averaged over all wavefronts in a kernel dispatch. + + - Instructions / wavefront + + * - Wave cycles + + - The number of cycles a wavefront in the kernel dispatch spent resident on + a compute unit per :ref:`normalization unit `. This + is averaged over all wavefronts in a kernel dispatch. Note: this should + not be directly compared to the kernel cycles above. + + - Cycles per :ref:`normalization unit ` + + * - Dependency wait cycles + + - The number of cycles a wavefront in the kernel dispatch stalled waiting + on memory of any kind (e.g., instruction fetch, vector or scalar memory, + etc.) per :ref:`normalization unit `. This counter + is incremented at every cycle by *all* wavefronts on a CU stalled at a + memory operation. As such, it is most useful to get a sense of how waves + were spending their time, rather than identification of a precise limiter + because another wave could be actively executing while a wave is stalled. + The sum of this metric, Issue Wait Cycles and Active Cycles should be + equal to the total Wave Cycles metric. + + - Cycles per :ref:`normalization unit ` + + * - Issue Wait Cycles + + - The number of cycles a wavefront in the kernel dispatch was unable to + issue an instruction for any reason (e.g., execution pipe back-pressure, + arbitration loss, etc.) per + :ref:`normalization unit `. This counter is + incremented at every cycle by *all* wavefronts on a CU unable to issue an + instruction. As such, it is most useful to get a sense of how waves were + spending their time, rather than identification of a precise limiter + because another wave could be actively executing while a wave is issue + stalled. The sum of this metric, Dependency Wait Cycles and Active + Cycles should be equal to the total Wave Cycles metric. + + - Cycles per :ref:`normalization unit ` + + * - Active Cycles + + - The average number of cycles a wavefront in the kernel dispatch was + actively executing instructions per + :ref:`normalization unit `. This measurement is made + on a per-wavefront basis, and may include cycles that another wavefront + spent actively executing (on another execution unit, for example) or was + stalled. As such, it is most useful to get a sense of how waves were + spending their time, rather than identification of a precise limiter. The + sum of this metric, Issue Wait Cycles and Active Wait Cycles should be + equal to the total Wave Cycles metric. + + - Cycles per :ref:`normalization unit ` + + * - Wavefront Occupancy + + - The time-averaged number of wavefronts resident on the accelerator over + the lifetime of the kernel. Note: this metric may be inaccurate for + short-running kernels (less than 1ms). + + - :ref:`Wavefronts ` + +.. note:: + + As mentioned earlier, the measurement of kernel cycles and time typically + cannot be directly compared to, for example, wave cycles. This is due to two factors: + first, the kernel cycles/timings are measured using a counter that is + impacted by scheduling overhead, this is particularly noticeable for + "short-running" kernels (less than 1ms) where scheduling overhead forms a + significant portion of the overall kernel runtime. Secondly, the wave cycles + metric is incremented per-wavefront scheduled to a SIMD every cycle whereas + the kernel cycles counter is incremented only once per-cycle when *any* + wavefront is scheduled. + +.. _instruction-mix: + +Instruction mix +=============== + +The instruction mix panel shows a breakdown of the various types of instructions +executed by the user’s kernel, and which pipelines on the +:doc:`CU ` they were executed on. In addition, ROCm Compute Profiler reports +further information about the breakdown of operation types for the +:ref:`VALU `, vector-memory, and :ref:`MFMA ` +instructions. + +.. note:: + + All metrics in this section count *instructions issued*, and *not* the total + number of operations executed. The values reported by these metrics will not + change regardless of the execution mask of the wavefront. Note that even if + the execution mask is identically zero (meaning that *no lanes are active*) + the instruction will still be counted, as CDNA accelerators still consider + these instructions *issued*. See + :mi200-isa-pdf:`EXECute Mask, section 3.3 of the CDNA2 ISA guide<19>` for + examples and further details. + +Overall instruction mix +----------------------- + +This panel shows the total number of each type of instruction issued to +the :doc:`various compute pipelines ` on the +:doc:`CU `. These are: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - :ref:`VALU ` instructions + + - The total number of vector arithmetic logic unit (VALU) operations + issued. These are the workhorses of the + :doc:`compute unit `, and are used to execute a wide range of + instruction types including floating point operations, non-uniform + address calculations, transcendental operations, integer operations, + shifts, conditional evaluation, etc. + + - Instructions + + * - VMEM instructions + + - The total number of vector memory operations issued. These include most + loads, stores and atomic operations and all accesses to + :ref:`generic, global, private and texture ` memory. + + - Instructions + + * - :doc:`LDS ` instructions + + - The total number of LDS (also known as shared memory) operations issued. + These include loads, stores, atomics, and HIP's ``__shfl`` operations. + + - Instructions + + * - :ref:`MFMA ` instructions + + - The total number of matrix fused multiply-add instructions issued. + + - Instructions + + * - :ref:`SALU ` instructions + + - The total number of scalar arithmetic logic unit (SALU) operations + issued. Typically these are used for address calculations, literal + constants, and other operations that are *provably* uniform across a + wavefront. Although scalar memory (SMEM) operations are issued by the + SALU, they are counted separately in this section. + + - Instructions + + * - SMEM instructions + + - The total number of scalar memory (SMEM) operations issued. These are + typically used for loading kernel arguments, base-pointers and loads + from HIP's ``__constant__`` memory. + + - Instructions + + * - :ref:`Branch ` instructions + + - The total number of branch operations issued. These typically consist of + jump or branch operations and are used to implement control flow. + + - Instructions + +.. note:: + + Note, as mentioned in the :ref:`desc-branch` section: branch + operations are not used for execution mask updates, but only for "whole + wavefront" control flow changes. + +.. _valu-arith-instruction-mix: + +VALU arithmetic instruction mix +------------------------------- + +.. warning:: + + Not all metrics in this section (for instance, the floating-point instruction + breakdowns) are available on CDNA accelerators older than the + :ref:`MI2XX ` series. + +This panel details the various types of vector instructions that were +issued to the :ref:`VALU `. The metrics in this section do *not* +include :ref:`MFMA ` instructions using the same precision; for +instance, the “F16-ADD” metric does not include any 16-bit floating point +additions executed as part of an MFMA instruction using the same precision. + +.. list-table:: + :header-rows: 1 + :widths: 15 65 20 + + * - Metric + + - Description + + - Unit + + * - INT32 + + - The total number of instructions operating on 32-bit integer operands + issued to the VALU per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - INT64 + + - The total number of instructions operating on 64-bit integer operands + issued to the VALU per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F16-ADD + + - The total number of addition instructions operating on 16-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F16-MUL + + - The total number of multiplication instructions operating on 16-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F16-FMA + + - The total number of fused multiply-add instructions operating on 16-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F16-TRANS + + - The total number of transcendental instructions (e.g., `sqrt`) operating + on 16-bit floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F32-ADD + + - The total number of addition instructions operating on 32-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F32-MUL + + - The total number of multiplication instructions operating on 32-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F32-FMA + + - The total number of fused multiply-add instructions operating on 32-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F32-TRANS + + - The total number of transcendental instructions (such as ``sqrt``) + operating on 32-bit floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F64-ADD + + - The total number of addition instructions operating on 64-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F64-MUL + + - The total number of multiplication instructions operating on 64-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F64-FMA + + - The total number of fused multiply-add instructions operating on 64-bit + floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - F64-TRANS + + - The total number of transcendental instructions (such as `sqrt`) + operating on 64-bit floating-point operands issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Conversion + + - The total number of type conversion instructions (such as converting data + to or from F32↔F64) issued to the VALU per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + +For an example of these counters in action, refer to +:ref:`valu-arith-instruction-mix-ex`. + +.. _vmem-instruction-mix: + +VMEM instruction mix +-------------------- + +This section breaks down the types of vector memory (VMEM) instructions +that were issued. Refer to the +:ref:`Instruction Counts metrics section ` under address +processor front end of the vL1D cache for descriptions of these VMEM +instructions. + +.. _mfma-instruction-mix: + +MFMA instruction mix +-------------------- + +.. warning:: + + The metrics in this section are only available on CDNA2 + (:ref:`MI2XX `) accelerators and newer. + +This section details the types of Matrix Fused Multiply-Add +(:ref:`MFMA `) instructions that were issued. Note that +MFMA instructions are classified by the type of input data they operate on, and +*not* the data type the result is accumulated to. + +.. list-table:: + :header-rows: 1 + :widths: 25 60 17 + + * - Metric + + - Description + + - Unit + + * - MFMA-I8 Instructions + + - The total number of 8-bit integer :ref:`MFMA ` instructions + issued per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - MFMA-F8 Instructions + + - The total number of 8-bit floating point :ref:`MFMA ` + instructions issued per :ref:`normalization unit `. This is supported in AMD Instinct MI300 series and later only. + + - Instructions per :ref:`normalization unit ` + + * - MFMA-F16 Instructions + + - The total number of 16-bit floating point :ref:`MFMA ` + instructions issued per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - MFMA-BF16 Instructions + + - The total number of 16-bit brain floating point :ref:`MFMA ` + instructions issued per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - MFMA-F32 Instructions + + - The total number of 32-bit floating-point :ref:`MFMA ` + instructions issued per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - MFMA-F64 Instructions + + - The total number of 64-bit floating-point :ref:`MFMA ` + instructions issued per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + +Compute pipeline +================ + +.. _metrics-flop-count: + +FLOP counting conventions +------------------------- + +ROCm Compute Profiler’s conventions for VALU FLOP counting are as follows: + +* Addition or multiplication: 1 operation + +* Transcendentals: 1 operation + +* Fused multiply-add (FMA): 2 operations + +Integer operations (IOPs) do not use this convention. They are counted +as a single operation regardless of the instruction type. + +.. note:: + + Packed operations which operate on multiple operands in the same instruction + are counted identically to the underlying instruction type. For example, the + ``v_pk_add_f32`` instruction on :ref:`MI2XX `, which performs an + add operation on two pairs of aligned 32-bit floating-point operands is + counted only as a single addition -- that is, 1 operation. + +As discussed in the :ref:`instruction-mix` section, the FLOP/IOP +metrics in this section do not take into account the execution mask of +the operation, and will report the same value even if the execution mask +is identically zero. + +For example, a FMA instruction operating on 32-bit floating-point +operands (such as ``v_fma_f32`` on a :ref:`MI2XX ` accelerator) +would be counted as 128 total FLOPs: 2 operations (due to the +instruction type) multiplied by 64 operations (because the wavefront is +composed of 64 work-items). + +.. _compute-speed-of-light: + +Compute Speed-of-Light +---------------------- + +.. warning:: + + The theoretical maximum throughput for some metrics in this section are + currently computed with the maximum achievable clock frequency, as reported + by ``rocminfo``, for an accelerator. This may not be realistic for all + workloads. + +This section reports the number of floating-point and integer operations +executed on the :ref:`VALU ` and :ref:`MFMA ` units in +various precisions. We note that unlike the +:ref:`VALU instruction mix ` and +:ref:`MFMA instruction mix ` sections, the metrics here +are reported as FLOPs and IOPs, that is, the total number of operations +executed. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - VALU FLOPs + + - The total floating-point operations executed per second on the + :ref:`VALU `. This is also presented as a percent of the peak + theoretical FLOPs achievable on the specific accelerator. Note: this does + not include any floating-point operations from :ref:`MFMA ` + instructions. + + - GFLOPs + + * - VALU IOPs + + - The total integer operations executed per second on the + :ref:`VALU `. This is also presented as a percent of the peak + theoretical IOPs achievable on the specific accelerator. Note: this does + not include any integer operations from :ref:`MFMA ` + instructions. + + - GIOPs + + * - MFMA FLOPs (BF16) + + - The total number of 16-bit brain floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 16-bit + brain floating point operations from :ref:`VALU ` + instructions. This is also presented as a percent of the peak theoretical + BF16 MFMA operations achievable on the specific accelerator. + + - GFLOPs + + * - MFMA FLOPs (F16) + + - The total number of 16-bit floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 16-bit + floating point operations from :ref:`VALU ` instructions. This + is also presented as a percent of the peak theoretical F16 MFMA + operations achievable on the specific accelerator. + + - GFLOPs + + * - MFMA FLOPs (F32) + + - The total number of 32-bit floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 32-bit + floating point operations from :ref:`VALU ` instructions. This + is also presented as a percent of the peak theoretical F32 MFMA + operations achievable on the specific accelerator. + + - GFLOPs + + * - MFMA FLOPs (F64) + + - The total number of 64-bit floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 64-bit + floating point operations from :ref:`VALU ` instructions. This + is also presented as a percent of the peak theoretical F64 MFMA + operations achievable on the specific accelerator. + + - GFLOPs + + * - MFMA IOPs (INT8) + + - The total number of 8-bit integer :ref:`MFMA ` operations + executed per second. Note: this does not include any 8-bit integer + operations from :ref:`VALU ` instructions. This is also + presented as a percent of the peak theoretical INT8 MFMA operations + achievable on the specific accelerator. + + - GIOPs + +.. _pipeline-stats: + +Pipeline statistics +------------------- + +This section reports a number of key performance characteristics of +various execution units on the :doc:`CU `. Refer to +:ref:`ipc-example` for a detailed dive into these metrics, and the +:ref:`scheduler ` the for a high-level overview of execution +units and instruction issue. + +.. list-table:: + :header-rows: 1 + :widths: 20 65 15 + + * - Metric + + - Description + + - Unit + + * - IPC + + - The ratio of the total number of instructions executed on the + :doc:`CU ` over the + :ref:`total active CU cycles `. + + - Instructions per-cycle + + * - IPC (Issued) + + - The ratio of the total number of + (non-:ref:`internal `) instructions issued over + the number of cycles where the :ref:`scheduler ` was + actively working on issuing instructions. Refer to the + :ref:`Issued IPC ` example for further detail. + + - Instructions per-cycle + + * - SALU utilization + + - Indicates what percent of the kernel's duration the + :ref:`SALU ` was busy executing instructions. Computed as the + ratio of the total number of cycles spent by the + :ref:`scheduler ` issuing SALU / :ref:`SMEM ` + instructions over the :ref:`total CU cycles `. + + - Percent + + * - VALU utilization + + - Indicates what percent of the kernel's duration the + :ref:`VALU ` was busy executing instructions. Does not include + :ref:`VMEM ` operations. Computed as the ratio of the total + number of cycles spent by the :ref:`scheduler ` issuing + VALU instructions over the :ref:`total CU cycles `. + + - Percent + + * - VMEM utilization + + - Indicates what percent of the kernel's duration the + :ref:`VMEM ` unit was busy executing instructions, including + both global/generic and spill/scratch operations (see the + :ref:`VMEM instruction count metrics ` for more + detail). Does not include :ref:`VALU ` operations. Computed + as the ratio of the total number of cycles spent by the + :ref:`scheduler ` issuing VMEM instructions over the + :ref:`total CU cycles `. + + - Percent + + * - Branch utilization + + - Indicates what percent of the kernel's duration the + :ref:`branch ` unit was busy executing instructions. + Computed as the ratio of the total number of cycles spent by the + :ref:`scheduler ` issuing branch instructions over the + :ref:`total CU cycles `. + + - Percent + + * - VALU active threads + + - Indicates the average level of :ref:`divergence ` within + a wavefront over the lifetime of the kernel. The number of work-items + that were active in a wavefront during execution of each + :ref:`VALU ` instruction, time-averaged over all VALU + instructions run on all wavefronts in the kernel. + + - Work-items + + * - MFMA utilization + + - Indicates what percent of the kernel's duration the + :ref:`MFMA ` unit was busy executing instructions. Computed as + the ratio of the total number of cycles spent by the + :ref:`MFMA ` was busy over the + :ref:`total CU cycles `. + + - Percent + + * - MFMA instruction cycles + + - The average duration of :ref:`MFMA ` instructions in this + kernel in cycles. Computed as the ratio of the total number of cycles the + MFMA unit was busy over the total number of MFMA instructions. Compare + to, for example, the + `AMD Matrix Instruction Calculator `_. + + - Cycles per instruction + + * - VMEM latency + + - The average number of round-trip cycles (that is, from issue to data + return / acknowledgment) required for a VMEM instruction to complete. + + - Cycles + + * - SMEM latency + + - The average number of round-trip cycles (that is, from issue to data + return / acknowledgment) required for a SMEM instruction to complete. + + - Cycles + +.. note:: + + The branch utilization reported in this section also includes time spent in + other instruction types (namely: ``s_endpgm``) that are *typically* a very + small percentage of the overall kernel execution. This complication is + omitted for simplicity, but may result in small amounts of branch utilization + (typically less than 1%) for otherwise branch-less kernels. + +.. _arithmetic-operations: + +Arithmetic operations +--------------------- + +This section reports the total number of floating-point and integer +operations executed in various precisions. Unlike the +:ref:`compute-speed-of-light` panel, this section reports both +:ref:`VALU ` and :ref:`MFMA ` operations of the same precision +(e.g., F32) in the same metric. Additionally, this panel lets the user +control how the data is normalized (i.e., control the +:ref:`normalization unit `), while the speed-of-light panel does +not. For more detail on how operations are counted see the +:ref:`FLOP counting convention ` section. + +.. warning:: + + As discussed in :ref:`instruction-mix`, the metrics in this section do not + take into account the execution mask of the operation, and will report the + same value even if EXEC is identically zero. + +.. list-table:: + :header-rows: 1 + :widths: 18 65 17 + + * - Metric + + - Description + + - Unit + + * - FLOPs (Total) + + - The total number of floating-point operations executed on either the + :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. + + - FLOP per :ref:`normalization unit ` + + * - IOPs (Total) + + - The total number of integer operations executed on either the + :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. + + - IOP per :ref:`normalization unit ` + + * - F16 OPs + + - The total number of 16-bit floating-point operations executed on either the + :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. + + - FLOP per :ref:`normalization unit ` + + * - BF16 OPs + + - The total number of 16-bit brain floating-point operations executed on either the + :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. Note: on current CDNA + accelerators, the VALU has no native BF16 instructions. + + - FLOP per :ref:`normalization unit ` + + * - F32 OPs + + - The total number of 32-bit floating-point operations executed on either + the :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. + + - FLOP per :ref:`normalization unit ` + + * - F64 OPs + + - The total number of 64-bit floating-point operations executed on either + the :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. + + - FLOP per :ref:`normalization unit ` + + * - INT8 OPs + + - The total number of 8-bit integer operations executed on either the + :ref:`VALU ` or :ref:`MFMA ` units, per + :ref:`normalization unit `. Note: on current CDNA + accelerators, the VALU has no native INT8 instructions. + + - IOPs per :ref:`normalization unit ` diff --git a/projects/rocprofiler-compute/docs/conceptual/references.rst b/projects/rocprofiler-compute/docs/conceptual/references.rst new file mode 100644 index 0000000000..4ed88dd86c --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/references.rst @@ -0,0 +1,26 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: References + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, HIP, GCN, LLVM, docs, documentation, training + +********** +References +********** + +Some sections in :doc:`this chapter ` cite the +following publicly available documentation. + +* :hip-training-pdf:`Introduction to AMD GPU Programming with HIP <>` + +* :mi200-isa-pdf:`CDNA2 ISA Reference Guide <>` + +* :cdna2-white-paper:`CDNA2 white paper <>` + +* :hsa-runtime-pdf:`HSA Runtime Programmer's Reference Manual <>` + +* :gcn-crash-course:`The AMD GCN Architecture - A Crash Course (Layla Mah) <>` + +* :mantor-gcn-pdf:`AMD Radeon HD7970 with GCN Architecture <>` + +* :mantor-vega10-pdf:`AMD Radeon Next Generation GPU Architecture - Vega10 <>` + +* :llvm-docs:`LLVM User Guide for AMDGPU Backend <>` diff --git a/projects/rocprofiler-compute/docs/conceptual/shader-engine.rst b/projects/rocprofiler-compute/docs/conceptual/shader-engine.rst new file mode 100644 index 0000000000..350ea4624f --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/shader-engine.rst @@ -0,0 +1,706 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Shader engine (SE) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, shader, engine, sL1D, L1I, workgroup manager, SPI + +****************** +Shader engine (SE) +****************** + +The :doc:`compute units ` on a CDNA™ accelerator are grouped +together into a higher-level organizational unit called a shader engine (SE): + +.. figure:: ../data/performance-model/selayout.png + :align: center + :alt: Example of CU-grouping into shader engines + :width: 800 + + Example of CU-grouping into shader engines on AMD Instinct MI-series + accelerators. + +The number of CUs on a SE varies from chip to chip -- see for example +:hip-training-pdf:`20`. In addition, newer accelerators such as the AMD +Instinct™ MI 250X have 8 SEs per accelerator. + +For the purposes of ROCm Compute Profiler, we consider resources that are shared between +multiple CUs on a single SE as part of the SE's metrics. + +These include: + +* The :ref:`scalar L1 data cache ` + +* The :ref:`L1 instruction cache ` + +* The :ref:`workgroup manager ` + +.. _desc-sl1d: + +Scalar L1 data cache (sL1D) +=========================== + +The Scalar L1 Data cache (sL1D) can cache data accessed from scalar load +instructions (and scalar store instructions on architectures where they exist) +from wavefronts in the :doc:`CUs `. The sL1D is shared between +multiple CUs (:gcn-crash-course:`36`) -- the exact number of CUs depends on the +architecture in question (3 CUs in GCN™ GPUs and MI100, 2 CUs in +:ref:`MI2XX `) -- and is backed by the :doc:`L2 cache `. + +In typical usage, the data in the sL1D is comprised of: + +* Kernel arguments, such as pointers, + `non-populated `_ + grid and block dimensions, and others + +* HIP's ``__constant__`` memory, when accessed in a provably uniform manner + [#uniform-access]_ + +* Other memory, when accessed in a provably uniform manner, *and* the backing + memory is provably constant [#uniform-access]_ + +.. _desc-sl1d-sol: + +Scalar L1D Speed-of-Light +------------------------- + +.. warning:: + + The theoretical maximum throughput for some metrics in this section are + currently computed with the maximum achievable clock frequency, as reported + by ``rocminfo``, for an accelerator. This may not be realistic for all + workloads. + +The Scalar L1D speed-of-light chart shows some key metrics of the sL1D +cache as a comparison with the peak achievable values of those metrics: + +.. list-table:: + :header-rows: 1 + :widths: 20 65 15 + + * - Metric + + - Description + + - Unit + + * - Bandwidth + + - The number of bytes looked up in the sL1D cache, as a percent of the peak + theoretical bandwidth. Calculated as the ratio of sL1D requests over the + :ref:`total sL1D cycles `. + + - Percent + + * - Cache Hit Rate + + - The percent of sL1D requests that hit [#sl1d-cache]_ on a previously + loaded line in the cache. Calculated as the ratio of the number of sL1D + requests that hit over the number of all sL1D requests. + + - Percent + + * - sL1D-L2 BW + + - The number of bytes requested by the sL1D from the L2 cache, as a percent + of the peak theoretical sL1D → L2 cache bandwidth. Calculated as the + ratio of the total number of requests from the sL1D to the L2 cache over + the :ref:`total sL1D-L2 interface cycles `. + + - Percent + +.. _desc-sl1d-stats: + +Scalar L1D cache accesses +------------------------- + +This panel gives more detail on the types of accesses made to the sL1D, +and the hit/miss statistics. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Requests + + - The total number of requests, of any size or type, made to the sL1D per + :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Hits + + - The total number of sL1D requests that hit on a previously loaded cache + line, per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Misses - Non Duplicated + + - The total number of sL1D requests that missed on a cache line that *was + not* already pending due to another request, per + :ref:`normalization unit `. See :ref:`desc-sl1d-sol` + for more detail. + + - Requests per :ref:`normalization unit ` + + * - Misses - Duplicated + + - The total number of sL1D requests that missed on a cache line that *was* + already pending due to another request, per + :ref:`normalization unit `. See + :ref:`desc-sl1d-sol` for more detail. + + - Requests per :ref:`normalization unit ` + + * - Cache Hit Rate + + - Indicates the percent of sL1D requests that hit on a previously loaded + line the cache. The ratio of the number of sL1D requests that hit + [#sl1d-cache]_ over the number of all sL1D requests. + + - Percent + + * - Read Requests (Total) + + - The total number of sL1D read requests of any size, per + :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Atomic Requests + + - The total number of sL1D atomic requests of any size, per + :ref:`normalization unit `. Typically unused on CDNA + accelerators. + + - Requests per :ref:`normalization unit ` + + * - Read Requests (1 DWord) + + - The total number of sL1D read requests made for a single dword of data + (4B), per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Read Requests (2 DWord) + + - The total number of sL1D read requests made for a two dwords of data + (8B), per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Read Requests (4 DWord) + + - The total number of sL1D read requests made for a four dwords of data + (16B), per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Read Requests (8 DWord) + + - The total number of sL1D read requests made for a eight dwords of data + (32B), per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Read Requests (16 DWord) + + - The total number of sL1D read requests made for a sixteen dwords of data + (64B), per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + +.. _desc-sl1d-l2-interface: + +sL1D ↔ L2 Interface +------------------- + +This panel gives more detail on the data requested across the +sL1D↔ +:doc:`L2 ` interface. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - sL1D-L2 BW + + - The total number of bytes read from, written to, or atomically updated + across the sL1D↔:doc:`L2 ` interface, per + :ref:`normalization unit `. Note that sL1D writes + and atomics are typically unused on current CDNA accelerators, so in the + majority of cases this can be interpreted as an sL1D→L2 read bandwidth. + + - Bytes per :ref:`normalization unit ` + + * - Read Requests + + - The total number of read requests from sL1D to the :doc:`L2 `, + per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Write Requests + + - The total number of write requests from sL1D to the :doc:`L2 `, + per :ref:`normalization unit `. Typically unused on + current CDNA accelerators. + + - Requests per :ref:`normalization unit ` + + * - Atomic Requests + + - The total number of atomic requests from sL1D to the + :doc:`L2 `, per + :ref:`normalization unit `. Typically unused on + current CDNA accelerators. + + - Requests per :ref:`normalization unit ` + + * - Stall Cycles + + - The total number of cycles the sL1D↔ + :doc:`L2 ` interface was stalled, per + :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + +.. rubric:: Footnotes + +.. [#uniform-access] The scalar data cache is used when the compiler emits + scalar loads to access data. This requires that the data be *provably* + uniformly accesses (that is, the compiler can verify that all work-items in a + wavefront access the same data), *and* that the data can be proven to be + read-only (for instance, HIP's ``__constant__`` memory, or properly + ``__restrict__``\ed pointers to avoid write-aliasing). Access of + ``__constant__`` memory for example is not guaranteed to go through the sL1D + if the wavefront loads a non-uniform value. + +.. [#sl1d-cache] Unlike the :doc:`vL1D ` and + :doc:`L2 ` caches, the sL1D cache on AMD Instinct MI-series CDNA + accelerators does *not* use the "hit-on-miss" approach to reporting cache + hits. That is, if while satisfying a miss, another request comes in that + would hit on the same pending cache line, the subsequent request will be + counted as a *duplicated miss*. + +.. _desc-l1i: + +L1 Instruction Cache (L1I) +========================== + +As with the :ref:`sL1D `, the L1 Instruction (L1I) cache is shared +between multiple CUs on a shader-engine, where the precise number of CUs +sharing a L1I depends on the architecture in question (:gcn-crash-course:`36`) +and is backed by the :doc:`L2 cache `. Unlike the sL1D, the +instruction cache is read-only. + +.. _desc-l1i-sol: + +L1I Speed-of-Light +------------------ + +.. warning:: + + The theoretical maximum throughput for some metrics in this section are + currently computed with the maximum achievable clock frequency, as reported + by ``rocminfo``, for an accelerator. This may not be realistic for all + workloads. + +The L1 Instruction Cache speed-of-light chart shows some key metrics of +the L1I cache as a comparison with the peak achievable values of those +metrics: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Bandwidth + + - The number of bytes looked up in the L1I cache, as a percent of the peak + theoretical bandwidth. Calculated as the ratio of L1I requests over the + :ref:`total L1I cycles `. + + - Percent + + * - Cache Hit Rate + + - The percent of L1I requests that hit on a previously loaded line the + cache. Calculated as the ratio of the number of L1I requests that hit + [#l1i-cache]_ over the number of all L1I requests. + + - Percent + + * - L1I-L2 BW + + - The percent of the peak theoretical L1I → L2 cache request bandwidth + achieved. Calculated as the ratio of the total number of requests from + the L1I to the L2 cache over the + :ref:`total L1I-L2 interface cycles `. + + - Percent + + * - Instruction Fetch Latency + + - The average number of cycles spent to fetch instructions to a + :doc:`CU `. + + - Cycles + +.. _desc-l1i-stats: + +L1I cache accesses +------------------ + +This panel gives more detail on the hit/miss statistics of the L1I: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Requests + + - The total number of requests made to the L1I per + :ref:`normalization-unit `. + + - Requests per :ref:`normalization unit `. + + * - Hits + + - The total number of L1I requests that hit on a previously loaded cache + line, per :ref:`normalization-unit `. + + - Requests per :ref:`normalization unit ` + + * - Misses - Non Duplicated + + - The total number of L1I requests that missed on a cache line that + *were not* already pending due to another request, per + :ref:`normalization-unit `. See note in + :ref:`desc-l1i-sol` for more detail. + + - Requests per :ref:`normalization unit `. + + * - Misses - Duplicated + + - The total number of L1I requests that missed on a cache line that *were* + already pending due to another request, per + :ref:`normalization-unit `. See note in + :ref:`desc-l1i-sol` for more detail. + + - Requests per :ref:`normalization unit ` + + * - Cache Hit Rate + + - The percent of L1I requests that hit [#l1i-cache]_ on a previously loaded + line the cache. Calculated as the ratio of the number of L1I requests + that hit over the number of all L1I requests. + + - Percent + +L1I - L2 interface +------------------ + +This panel gives more detail on the data requested across the +L1I-:doc:`L2 ` interface. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - L1I-L2 BW + + - The total number of bytes read across the L1I-:doc:`L2 ` + interface, per :ref:`normalization unit `. + + - Bytes per :ref:`normalization unit ` + +.. rubric:: Footnotes + +.. [#l1i-cache] Unlike the :doc:`vL1D ` and + :doc:`L2 ` caches, the L1I cache on AMD Instinct MI-series CDNA + accelerators does *not* use the "hit-on-miss" approach to reporting cache + hits. That is, if while satisfying a miss, another request comes in that + would hit on the same pending cache line, the subsequent request will be + counted as a *duplicated miss*. + +.. _desc-spi: + +Workgroup manager (SPI) +======================= + +The workgroup manager (SPI) is the bridge between the +:doc:`command processor ` and the +:doc:`compute units `. After the command processor processes a +kernel dispatch, it will then pass the dispatch off to the workgroup manager, +which then schedules :ref:`workgroups ` onto the compute units. +As workgroups complete execution and resources become available, the +workgroup manager will schedule new workgroups onto compute units. The workgroup +manager’s metrics therefore are focused on reporting the following: + +* Utilizations of various parts of the accelerator that the workgroup + manager interacts with (and the workgroup manager itself) + +* How many workgroups were dispatched, their size, and how many + resources they used + +* Percent of scheduler opportunities (cycles) where workgroups failed + to dispatch, and + +* Percent of scheduler opportunities (cycles) where workgroups failed + to dispatch due to lack of a specific resource on the CUs (for instance, too + many VGPRs allocated) + +This gives you an idea of why the workgroup manager couldn’t schedule more +wavefronts onto the device, and is most useful for workloads that you suspect to +be limited by scheduling or launch rate. + +As discussed in :doc:`Command processor `, the command +processor on AMD Instinct MI-series architectures contains four hardware +scheduler-pipes, each with eight software threads (:mantor-vega10-pdf:`19`). Each +scheduler-pipe can issue a kernel dispatch to the workgroup manager to schedule +concurrently. Therefore, some workgroup manager metrics are presented relative +to the utilization of these scheduler-pipes (for instance, whether all four are +issuing concurrently). + +.. note:: + + Current versions of the profiling libraries underlying ROCm Compute Profiler attempt to + serialize concurrent kernels running on the accelerator, as the performance + counters on the device are global (that is, shared between concurrent + kernels). This means that these scheduler-pipe utilization metrics are + expected to reach (for example) a maximum of one pipe active -- only 25%. + +Workgroup manager utilizations +------------------------------ + +This section describes the utilization of the workgroup manager, and the +hardware components it interacts with. + +.. list-table:: + :header-rows: 1 + :widths: 20 65 15 + + * - Metric + + - Description + + - Unit + + * - Accelerator utilization + + - The percent of cycles in the kernel where the accelerator was actively + doing any work. + + - Percent + + * - Scheduler-pipe utilization + + - The percent of :ref:`total scheduler-pipe cycles ` in + the kernel where the scheduler-pipes were actively doing any work. Note: + this value is expected to range between 0% and 25%. See :ref:`desc-spi`. + + - Percent + + * - Workgroup manager utilization + + - The percent of cycles in the kernel where the workgroup manager was + actively doing any work. + + - Percent + + * - Shader engine utilization + + - The percent of :ref:`total shader engine cycles ` in the + kernel where any CU in a shader-engine was actively doing any work, + normalized over all shader-engines. Low values (e.g., << 100%) indicate + that the accelerator was not fully saturated by the kernel, or a + potential load-imbalance issue. + + - Percent + + * - SIMD utilization + + - The percent of :ref:`total SIMD cycles ` in the kernel + where any :ref:`SIMD ` on a CU was actively doing any work, + summed over all CUs. Low values (less than 100%) indicate that the + accelerator was not fully saturated by the kernel, or a potential + load-imbalance issue. + + - Percent + + * - Dispatched workgroups + + - The total number of workgroups forming this kernel launch. + + - Workgroups + + * - Dispatched wavefronts + + - The total number of wavefronts, summed over all workgroups, forming this + kernel launch. + + - Wavefronts + + * - VGPR writes + + - The average number of cycles spent initializing :ref:`VGPRs ` + at wave creation. + + - Cycles/wave + + * - SGPR Writes + + - The average number of cycles spent initializing :ref:`SGPRs ` + at wave creation. + + - Cycles/wave + +Resource allocation +------------------- + +This panel gives more detail on how workgroups and wavefronts were scheduled +onto compute units, and what occupancy limiters they hit -- if any. When +analyzing these metrics, you should also take into account their +achieved occupancy -- such as +:ref:`wavefront occupancy `. A kernel may be occupancy +limited by LDS usage, for example, but may still achieve high occupancy levels +such that improving occupancy further may not improve performance. See +:ref:`occupancy-example` for details. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Not-scheduled rate (Workgroup Manager) + + - The percent of :ref:`total scheduler-pipe cycles ` in + the kernel where a workgroup could not be scheduled to a + :doc:`CU ` due to a bottleneck within the workgroup manager + rather than a lack of a CU or :ref:`SIMD ` with sufficient + resources. Note: this value is expected to range between 0-25%. See note + in :ref:`workgroup manager ` description. + + - Percent + + * - Not-scheduled rate (Scheduler-Pipe) + + - The percent of :ref:`total scheduler-pipe cycles ` in + the kernel where a workgroup could not be scheduled to a + :doc:`CU ` due to a bottleneck within the scheduler-pipes + rather than a lack of a CU or :ref:`SIMD ` with sufficient + resources. Note: this value is expected to range between 0-25%, see note + in :ref:`workgroup manager ` description. + + - Percent + + * - Scheduler-Pipe Stall Rate + + - The percent of :ref:`total scheduler-pipe cycles ` in + the kernel where a workgroup could not be scheduled to a + :doc:`CU ` due to occupancy limitations (like a lack of a + CU or :ref:`SIMD ` with sufficient resources). Note: this + value is expected to range between 0-25%, see note in + :ref:`workgroup manager ` description. + + - Percent + + * - Scratch Stall Rate + + - The percent of :ref:`total shader-engine cycles ` in the + kernel where a workgroup could not be scheduled to a + :doc:`CU ` due to lack of + :ref:`private (a.k.a., scratch) memory ` slots. While this + can reach up to 100%, note that the actual occupancy limitations on a + kernel using private memory are typically quite small (for example, less + than 1% of the total number of waves that can be scheduled to an + accelerator). + + - Percent + + * - Insufficient SIMD Waveslots + + - The percent of :ref:`total SIMD cycles ` in the kernel + where a workgroup could not be scheduled to a :ref:`SIMD ` + due to lack of available :ref:`waveslots `. + + - Percent + + * - Insufficient SIMD VGPRs + + - The percent of :ref:`total SIMD cycles ` in the kernel + where a workgroup could not be scheduled to a :ref:`SIMD ` + due to lack of available :ref:`VGPRs `. + + - Percent + + * - Insufficient SIMD SGPRs + + - The percent of :ref:`total SIMD cycles ` in the kernel + where a workgroup could not be scheduled to a :ref:`SIMD ` + due to lack of available :ref:`SGPRs `. + + - Percent + + * - Insufficient CU LDS + + - The percent of :ref:`total CU cycles ` in the kernel + where a workgroup could not be scheduled to a :doc:`CU ` + due to lack of available :doc:`LDS `. + + - Percent + + * - Insufficient CU Barriers + + - The percent of :ref:`total CU cycles ` in the kernel + where a workgroup could not be scheduled to a :doc:`CU ` + due to lack of available :ref:`barriers `. + + - Percent + + * - Reached CU Workgroup Limit + + - The percent of :ref:`total CU cycles ` in the kernel + where a workgroup could not be scheduled to a :doc:`CU ` + due to limits within the workgroup manager. This is expected to be + always be zero on CDNA2 or newer accelerators (and small for previous + accelerators). + + - Percent + + * - Reached CU Wavefront Limit + + - The percent of :ref:`total CU cycles ` in the kernel + where a wavefront could not be scheduled to a :doc:`CU ` + due to limits within the workgroup manager. This is expected to be + always be zero on CDNA2 or newer accelerators (and small for previous + accelerators). + + - Percent diff --git a/projects/rocprofiler-compute/docs/conceptual/system-speed-of-light.rst b/projects/rocprofiler-compute/docs/conceptual/system-speed-of-light.rst new file mode 100644 index 0000000000..5652a5e3f9 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/system-speed-of-light.rst @@ -0,0 +1,327 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: System Speed-of-Light + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, system, speed of light + +********************* +System Speed-of-Light +********************* + +System Speed-of-Light summarizes some of the key metrics from various sections +of ROCm Compute Profiler’s profiling report. + +.. warning:: + + The theoretical maximum throughput for some metrics in this section are + currently computed with the maximum achievable clock frequency, as reported + by ``rocminfo``, for an accelerator. This may not be realistic for + all workloads. + + Also, not all metrics -- such as FLOP counters -- are available on all AMD + Instinct™ MI-series accelerators. For more detail on how operations are + counted, see the :ref:`metrics-flop-count` section. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - :ref:`VALU ` FLOPs + + - The total floating-point operations executed per second on the + :ref:`VALU `. This is also presented as a percent of the peak + theoretical FLOPs achievable on the specific accelerator. Note: this does + not include any floating-point operations from :ref:`MFMA ` + instructions. + + - GFLOPs + + * - :ref:`VALU ` IOPs + + - The total integer operations executed per second on the + :ref:`VALU `. This is also presented as a percent of the peak + theoretical IOPs achievable on the specific accelerator. Note: this does + not include any integer operations from :ref:`MFMA ` + instructions. + + - GIOPs + + * - :ref:`MFMA ` FLOPs (F8) + + - The total number of 8-bit floating point :ref:`MFMA ` + operations executed per second. This does not include any 16-bit + brain floating point operations from :ref:`VALU ` + instructions. This is also presented as a percent of the peak theoretical + F8 MFMA operations achievable on the specific accelerator. It is supported on AMD Instinct MI300 series and later only. + + - GFLOPs + + * - :ref:`MFMA ` FLOPs (BF16) + + - The total number of 16-bit brain floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 16-bit + brain floating point operations from :ref:`VALU ` + instructions. This is also presented as a percent of the peak theoretical + BF16 MFMA operations achievable on the specific accelerator. + + - GFLOPs + + * - :ref:`MFMA ` FLOPs (F16) + + - The total number of 16-bit floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 16-bit + floating point operations from :ref:`VALU ` instructions. This + is also presented as a percent of the peak theoretical F16 MFMA + operations achievable on the specific accelerator. + + - GFLOPs + + * - :ref:`MFMA ` FLOPs (F32) + + - The total number of 32-bit floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 32-bit + floating point operations from :ref:`VALU ` instructions. This + is also presented as a percent of the peak theoretical F32 MFMA + operations achievable on the specific accelerator. + + - GFLOPs + + * - :ref:`MFMA ` FLOPs (F64) + + - The total number of 64-bit floating point :ref:`MFMA ` + operations executed per second. Note: this does not include any 64-bit + floating point operations from :ref:`VALU ` instructions. This + is also presented as a percent of the peak theoretical F64 MFMA + operations achievable on the specific accelerator. + + - GFLOPs + + * - :ref:`MFMA ` IOPs (INT8) + + - The total number of 8-bit integer :ref:`MFMA ` operations + executed per second. Note: this does not include any 8-bit integer + operations from :ref:`VALU ` instructions. This is also + presented as a percent of the peak theoretical INT8 MFMA operations + achievable on the specific accelerator. + + - GIOPs + + * - :ref:`SALU ` utilization + + - Indicates what percent of the kernel's duration the + :ref:`SALU ` was busy executing instructions. Computed as the + ratio of the total number of cycles spent by the + :ref:`scheduler ` issuing :ref:`SALU ` or + :ref:`SMEM ` instructions over the + :ref:`total CU cycles `. + + - Percent + + * - :ref:`VALU ` utilization + + - Indicates what percent of the kernel's duration the + :ref:`VALU ` was busy executing instructions. Does not include + :ref:`VMEM ` operations. Computed as the ratio of the total + number of cycles spent by the :ref:`scheduler ` issuing + :ref:`VALU ` instructions over the + :ref:`total CU cycles `. + + - Percent + + * - :ref:`MFMA ` utilization + + - Indicates what percent of the kernel's duration the + :ref:`MFMA ` unit was busy executing instructions. Computed as + the ratio of the total number of cycles the MFMA was busy over the + :ref:`total CU cycles `. + + - Percent + + * - :ref:`VMEM ` utilization + + - Indicates what percent of the kernel's duration the + :ref:`VMEM ` unit was busy executing instructions, including + both global/generic and spill/scratch operations (see the + :ref:`VMEM instruction count metrics `) for more + detail). Does not include :ref:`VALU ` operations. Computed as + the ratio of the total number of cycles spent by the + :ref:`scheduler ` issuing VMEM instructions over the + :ref:`total CU cycles `. + + - Percent + + * - :ref:`Branch ` utilization + + - Indicates what percent of the kernel's duration the + :ref:`branch ` unit was busy executing instructions. + Computed as the ratio of the total number of cycles spent by the + :ref:`scheduler ` issuing :ref:`branch ` + instructions over the :ref:`total CU cycles ` + + - Percent + + * - :ref:`VALU ` active threads + + - Indicates the average level of :ref:`divergence ` within + a wavefront over the lifetime of the kernel. The number of work-items + that were active in a wavefront during execution of each + :ref:`VALU ` instruction, time-averaged over all VALU + instructions run on all wavefronts in the kernel. + + - Work-items + + * - IPC + + - The ratio of the total number of instructions executed on the + :doc:`CU ` over the + :ref:`total active CU cycles `. This is also + presented as a percent of the peak theoretical bandwidth achievable on + the specific accelerator. + + - Instructions per-cycle + + * - Wavefront occupancy + + - The time-averaged number of wavefronts resident on the accelerator over + the lifetime of the kernel. Note: this metric may be inaccurate for + short-running kernels (less than 1ms). This is also presented as a + percent of the peak theoretical occupancy achievable on the specific + accelerator. + + - Wavefronts + + * - :doc:`LDS ` theoretical bandwidth + + - Indicates the maximum amount of bytes that could have been loaded from, + stored to, or atomically updated in the LDS per unit time (see + :ref:`LDS Bandwidth ` example for more detail). This is + also presented as a percent of the peak theoretical F64 MFMA operations + achievable on the specific accelerator. + + - GB/s + + * - :doc:`LDS ` bank conflicts/access + + - The ratio of the number of cycles spent in the + :doc:`LDS scheduler ` due to bank conflicts (as + determined by the conflict resolution hardware) to the base number of + cycles that would be spent in the LDS scheduler in a completely + uncontended case. This is also presented in normalized form (i.e., the + Bank Conflict Rate). + + - Conflicts/Access + + * - :doc:`vL1D ` cache hit rate + + - The ratio of the number of vL1D cache line requests that hit in vL1D + cache over the total number of cache line requests to the + :ref:`vL1D cache RAM `. + + - Percent + + * - :doc:`vL1D ` cache bandwidth + + - The number of bytes looked up in the vL1D cache as a result of + :ref:`VMEM ` instructions per unit time. The number of bytes + is calculated as the number of cache lines requested multiplied by the + cache line size. This value does not consider partial requests, so e.g., + if only a single value is requested in a cache line, the data movement + will still be counted as a full cache line. This is also presented as a + percent of the peak theoretical bandwidth achievable on the specific + accelerator. + + - GB/s + + * - :doc:`L2 ` cache hit rate + + - The ratio of the number of L2 cache line requests that hit in the L2 + cache over the total number of incoming cache line requests to the L2 + cache. + + - Percent + + * - :doc:`L2 ` cache bandwidth + + - The number of bytes looked up in the L2 cache per unit time. The number + of bytes is calculated as the number of cache lines requested multiplied + by the cache line size. This value does not consider partial requests, so + e.g., if only a single value is requested in a cache line, the data + movement will still be counted as a full cache line. This is also + presented as a percent of the peak theoretical bandwidth achievable on + the specific accelerator. + + - GB/s + + * - :doc:`L2 `-fabric read BW + + - The number of bytes read by the L2 over the + :ref:`Infinity Fabric™ interface ` per unit time. This is also + presented as a percent of the peak theoretical bandwidth achievable on + the specific accelerator. + + - GB/s + + * - :doc:`L2 `-fabric write and atomic BW + + - The number of bytes sent by the L2 over the + :ref:`Infinity Fabric interface ` by write and atomic + operations per unit time. This is also presented as a percent of the peak + theoretical bandwidth achievable on the specific accelerator. + + - GB/s + + * - :doc:`L2 `-fabric read latency + + - The time-averaged number of cycles read requests spent in Infinity Fabric + before data was returned to the L2. + + - Cycles + + * - :doc:`L2 `-fabric write latency + + - The time-averaged number of cycles write requests spent in Infinity + Fabric before a completion acknowledgement was returned to the L2. + + - Cycles + + * - :ref:`sL1D ` cache hit rate + + - The percent of sL1D requests that hit on a previously loaded line the + cache. Calculated as the ratio of the number of sL1D requests that hit + over the number of all sL1D requests. + + - Percent + + * - :ref:`sL1D ` bandwidth + + - The number of bytes looked up in the sL1D cache per unit time. This is + also presented as a percent of the peak theoretical bandwidth achievable + on the specific accelerator. + + - GB/s + + * - :ref:`L1I ` bandwidth + + - The number of bytes looked up in the L1I cache per unit time. This is + also presented as a percent of the peak theoretical bandwidth achievable + on the specific accelerator. + + - GB/s + + * - :ref:`L1I ` cache hit rate + + - The percent of L1I requests that hit on a previously loaded line the + cache. Calculated as the ratio of the number of L1I requests that hit + over the number of all L1I requests. + + - Percent + + * - :ref:`L1I ` fetch latency + + - The average number of cycles spent to fetch instructions to a + :doc:`CU `. + + - Cycles diff --git a/projects/rocprofiler-compute/docs/conceptual/vector-l1-cache.rst b/projects/rocprofiler-compute/docs/conceptual/vector-l1-cache.rst new file mode 100644 index 0000000000..f845b791c8 --- /dev/null +++ b/projects/rocprofiler-compute/docs/conceptual/vector-l1-cache.rst @@ -0,0 +1,766 @@ +.. meta:: + :description: ROCm Compute Profiler performance model: Vector L1 cache (vL1D) + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, vector, l1, cache, vl1d + +********************** +Vector L1 cache (vL1D) +********************** + +The vector L1 data (vL1D) cache is local to each +:doc:`compute unit ` on the accelerator, and handles vector memory +operations issued by a wavefront. The vL1D cache consists of several components: + +* An address processing unit, also known as the + :ref:`texture addresser ` which receives commands (instructions) and + write/atomic data from the :doc:`compute unit `, and coalesces + them into fewer requests for the cache to process. + +* An address translation unit, also known as the + :ref:`L1 Unified Translation Cache (UTCL1) `, that translates + requests from virtual to physical addresses for lookup in the cache. The + translation unit has an L1 translation lookaside buffer (L1TLB) to reduce the + cost of repeated translations. + +* A Tag RAM that looks up whether a requested cache line is already + present in the :ref:`cache `. + +* The result of the Tag RAM lookup is placed in the L1 cache controller + for routing to the correct location; for instance, the + :ref:`L2 Memory Interface ` for misses or the + :ref:`cache RAM ` for hits. + +* The cache RAM, also known as the :ref:`texture cache (TC) `, stores + requested data for potential reuse. Data returned from the + :doc:`L2 cache ` is placed into the cache RAM before going down the + :ref:`data-return path `. + +* A backend data processing unit, also known as the + :ref:`texture data (TD) ` that routes data back to the requesting + :doc:`compute unit `. + +Together, this complex is known as the vL1D, or Texture Cache per Pipe +(TCP). A simplified diagram of the vL1D is presented below: + +.. figure:: ../data/performance-model/l1perf_model.png + :align: center + :alt: Performance model of the vL1D Cache on AMD Instinct + :width: 800 + + Performance model of the vL1D Cache on AMD Instinct MI-series accelerators. + +.. _vl1d-sol: + +vL1D Speed-of-Light +=================== + +.. warning:: + + The theoretical maximum throughput for some metrics in this section are + currently computed with the maximum achievable clock frequency, as reported + by ``rocminfo``, for an accelerator. This may not be realistic for all + workloads. + +The vL1D’s speed-of-light chart shows several key metrics for the vL1D +as a comparison with the peak achievable values of those metrics. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Hit Rate + + - The ratio of the number of vL1D cache line requests that hit [#vl1d-hit]_ + in vL1D cache over the total number of cache line requests to the + :ref:`vL1D Cache RAM `. + + - Percent + + * - Bandwidth + + - The number of bytes looked up in the vL1D cache as a result of + :ref:`VMEM ` instructions, as a percent of the peak + theoretical bandwidth achievable on the specific accelerator. The number + of bytes is calculated as the number of cache lines requested multiplied + by the cache line size. This value does not consider partial requests, so + for instance, if only a single value is requested in a cache line, the + data movement will still be counted as a full cache line. + + - Percent + + * - Utilization + + - Indicates how busy the :ref:`vL1D Cache RAM ` was during the + kernel execution. The number of cycles where the vL1D Cache RAM is + actively processing any request divided by the number of cycles where the + vL1D is active [#vl1d-activity]_. + + - Percent + + * - Coalescing + + - Indicates how well memory instructions were coalesced by the + :ref:`address processing unit `, ranging from uncoalesced (25%) + to fully coalesced (100%). Calculated as the average number of + :ref:`thread-requests ` generated per instruction + divided by the ideal number of thread-requests per instruction. + + - Percent + +.. _desc-ta: + +Address processing unit or Texture Addresser (TA) +================================================= + +The :doc:`vL1D `’s address processing unit receives vector +memory instructions (commands) along with write/atomic data from a +:doc:`compute unit ` and is responsible for coalescing these into +requests for lookup in the :ref:`vL1D RAM `. The address processor +passes information about the commands (coalescing state, destination SIMD, +etc.) to the :ref:`data processing unit ` for use after the requested +data has been retrieved. + +ROCm Compute Profiler reports several metrics to indicate performance bottlenecks in +the address processing unit, which are broken down into a few +categories: + +- :ref:`ta-busy-stall` + +- :ref:`ta-instruction-counts` + +- :ref:`ta-spill-stack` + +.. _ta-busy-stall: + +Busy / stall metrics +-------------------- + +When executing vector memory instructions, the compute unit must send an +address (and in the case of writes/atomics, data) to the address +processing unit. When the front-end cannot accept any more addresses, it +must backpressure the wave-issue logic for the VMEM pipe and prevent the +issue of further vector memory instructions. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Busy + + - Percent of the :ref:`total CU cycles ` the address + processor was busy + + - Percent + + * - Address Stall + + - Percent of the :ref:`total CU cycles ` the address + processor was stalled from sending address requests further into the vL1D + pipeline + + - Percent + + * - Data Stall + + - Percent of the :ref:`total CU cycles ` the address + processor was stalled from sending write/atomic data further into the + vL1D pipeline + + - Percent + + * - Data-Processor → Address Stall + + - Percent of :ref:`total CU cycles ` the address processor + was stalled waiting to send command data to the + :ref:`data processor ` + + - Percent + +.. _ta-instruction-counts: + +Instruction counts +------------------ + +The address processor also counts instruction types to give the user +information on what sorts of memory instructions were executed by the +kernel. These are broken down into a few major categories: + +.. list-table:: + :header-rows: 1 + + * - Memory type + + - Usage + + - Description + + * - Global + + - Global memory + + - Global memory can be seen by all threads from a process. This includes + the local accelerator's DRAM, remote accelerator's DRAM, and the host's + DRAM. + + * - Generic + + - Dynamic address spaces + + - Generic memory, or "flat" memory, is used when the compiler cannot + statically prove that a pointer is to memory in one or the other address + spaces. The pointer could dynamically point into global, local, constant, + or private memory. + + * - Private Memory + + - Register spills / Stack memory + + - Private memory, or "scratch" memory, is only visible to a particular + :ref:`work-item ` in a particular + :ref:`workgroup `. On AMD Instinct™ MI-series + accelerators, private memory is used to implement both register spills + and stack memory accesses. + +The address processor counts these instruction types as follows: + +.. list-table:: + :header-rows: 1 + + * - Type + + - Description + + - Unit + + * - Global/Generic + + - The total number of global & generic memory instructions executed on all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Global/Generic Read + + - The total number of global & generic memory read instructions executed on + all :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Global/Generic Write + + - The total number of global & generic memory write instructions executed + on all :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Global/Generic Atomic + + - The total number of global & generic memory atomic (with and without + return) instructions executed on all :doc:`compute units ` + on the accelerator, per :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Spill/Stack + + - The total number of spill/stack memory instructions executed on all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Spill/Stack Read + + - The total number of spill/stack memory read instructions executed on all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Spill/Stack Write + + - The total number of spill/stack memory write instructions executed on all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. + + - Instruction per :ref:`normalization unit ` + + * - Spill/Stack Atomic + + - The total number of spill/stack memory atomic (with and without return) + instructions executed on all :doc:`compute units ` on the + accelerator, per :ref:`normalization unit `. + Typically unused as these memory operations are typically used to + implement thread-local storage. + + - Instructions per :ref:`normalization unit ` + +.. note:: + + The above is a simplified model specifically for the HIP programming language + that does not consider inline assembly usage, constant memory usage or + texture memory. + + These categories correspond to: + + * Global/Generic: global and flat memory operations, that are used for global + and generic memory access. + + * Spill/Stack: buffer instructions which are used on the MI50, MI100, and + :ref:`MI2XX ` accelerators for register spills / stack memory. + + These concepts are described in more detail in the :ref:`memory-spaces`, + while generic memory access is explored in the + :ref:`generic memory benchmark ` section. + +.. _ta-spill-stack: + +Spill / stack metrics +--------------------- + +Finally, the address processing unit contains a separate coalescing +stage for spill/stack memory, and thus reports: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Spill/Stack Total Cycles + + - The number of cycles the address processing unit spent working on + spill/stack instructions, per + :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Spill/Stack Coalesced Read Cycles + + - The number of cycles the address processing unit spent working on + coalesced spill/stack read instructions, per + :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + + * - Spill/Stack Coalesced Write Cycles + + - The number of cycles the address processing unit spent working on + coalesced spill/stack write instructions, per + :ref:`normalization unit `. + + - Cycles per :ref:`normalization unit ` + +.. _desc-utcl1: + +L1 Unified Translation Cache (UTCL1) +==================================== + +After a vector memory instruction has been processed/coalesced by the +address processing unit of the vL1D, it must be translated from a +virtual to physical address. This process is handled by the L1 Unified +Translation Cache (UTCL1). This cache contains a L1 Translation +Lookaside Buffer (TLB) which stores recently translated addresses to +reduce the cost of subsequent re-translations. + +ROCm Compute Profiler reports the following L1 TLB metrics: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Requests + + - The number of translation requests made to the UTCL1 per + :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Hits + + - The number of translation requests that hit in the UTCL1, and could be + reused, per :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Hit Ratio + + - The ratio of the number of translation requests that hit in the UTCL1 + divided by the total number of translation requests made to the UTCL1. + + - Percent + + * - Translation Misses + + - The total number of translation requests that missed in the UTCL1 due to + translation not being present in the cache, per + :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - Permission Misses + + - The total number of translation requests that missed in the UTCL1 due to + a permission error, per :ref:`normalization unit `. + This is unused and expected to be zero in most configurations for modern + CDNA™ accelerators. + + - Requests per :ref:`normalization unit ` + +.. note:: + + On current CDNA accelerators, such as the :ref:`MI2XX `, the + UTCL1 does *not* count hit-on-miss requests. + +.. _desc-tc: + +Vector L1 Cache RAM or Texture Cache (TC) +========================================= + +After coalescing in the :ref:`address processing unit ` of the v1LD, +and address translation in the :ref:`L1 TLB ` the request proceeds +to the Cache RAM stage of the pipeline. Incoming requests are looked up +in the cache RAMs using parts of the physical address as a tag. Hits +will be returned through the :ref:`data-return path `, while misses +will routed out to the :doc:`L2 Cache ` for servicing. + +The metrics tracked by the vL1D RAM include: + +- :ref:`Stall metrics ` + +- :ref:`Cache access metrics ` + +- :ref:`vL1D-L2 transaction detail metrics ` + +.. _vl1d-cache-stall-metrics: + +vL1D cache stall metrics +------------------------ + +The vL1D also reports where it is stalled in the pipeline, which may +indicate performance limiters of the cache. A stall in the pipeline may +result in backpressuring earlier parts of the pipeline, e.g., a stall on +L2 requests may backpressure the wave-issue logic of the :ref:`VMEM ` +pipe and prevent it from issuing more vector memory instructions until +the vL1D’s outstanding requests are completed. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Stalled on L2 Data + + - The ratio of the number of cycles where the vL1D is stalled waiting for + requested data to return from the :doc:`L2 cache ` divided by + the number of cycles where the vL1D is active [#vl1d-activity]_. + + - Percent + + * - Stalled on L2 Requests + + - The ratio of the number of cycles where the vL1D is stalled waiting to + issue a request for data to the :doc:`L2 cache ` divided by the + number of cycles where the vL1D is active [#vl1d-activity]_. + + - Percent + + * - Tag RAM Stall (Read/Write/Atomic) + + - The ratio of the number of cycles where the vL1D is stalled due to + Read/Write/Atomic requests with conflicting tags being looked up + concurrently, divided by the number of cycles where the + vL1D is active [#vl1d-activity]_. + + - Percent + +.. _vl1d-cache-access-metrics: + +vL1D cache access metrics +------------------------- + +The vL1D cache access metrics broadly indicate the type of requests +incoming from the :ref:`cache front-end `, the number of requests that +were serviced by the vL1D, and the number & type of outgoing requests to +the :doc:`L2 cache `. In addition, this section includes the +approximate latencies of accesses to the cache itself, along with +latencies of read/write memory operations to the :doc:`L2 cache `. + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Total Requests + + - The total number of incoming requests from the + :ref:`address processing unit ` after coalescing. + + - Requests + + * - Total read/write/atomic requests + + - The total number of incoming read/write/atomic requests from the + :ref:`address processing unit ` after coalescing per + :ref:`normalization unit ` + + - Requests per :ref:`normalization unit ` + + * - Cache Bandwidth + + - The number of bytes looked up in the vL1D cache as a result of + :ref:`VMEM ` instructions per + :ref:`normalization unit `. The number of bytes is + calculated as the number of cache lines requested multiplied by the cache + line size. This value does not consider partial requests, so for + instance, if only a single value is requested in a cache line, the data + movement will still be counted as a full cache line. + + - Bytes per :ref:`normalization unit ` + + * - Cache Hit Rate [#vl1d-hit]_ + + - The ratio of the number of vL1D cache line requests that hit in vL1D + cache over the total number of cache line requests to the + :ref:`vL1D Cache RAM `. + + - Percent + + * - Cache Accesses + + - The total number of cache line lookups in the vL1D. + + - Cache lines + + * - Cache Hits [#vl1d-hit]_ + + - The number of cache accesses minus the number of outgoing requests to the + :doc:`L2 cache `, that is, the number of cache line requests + serviced by the :ref:`vL1D Cache RAM ` per + :ref:`normalization unit `. + + - Cache lines per :ref:`normalization unit ` + + * - Invalidations + + - The number of times the vL1D was issued a write-back invalidate command + during the kernel's execution per + :ref:`normalization unit `. This may be triggered + by, for instance, the ``buffer_wbinvl1`` instruction. + + - Invalidations per :ref:`normalization unit ` + + * - L1-L2 Bandwidth + + - The number of bytes transferred across the vL1D-L2 interface as a result + of :ref:`VMEM ` instructions, per + :ref:`normalization unit `. The number of bytes is + calculated as the number of cache lines requested multiplied by the cache + line size. This value does not consider partial requests, so for + instance, if only a single value is requested in a cache line, the data + movement will still be counted as a full cache line. + + - Bytes per :ref:`normalization unit ` + + * - L1-L2 Reads + + - The number of read requests for a vL1D cache line that were not satisfied + by the vL1D and must be retrieved from the to the + :doc:`L2 Cache ` per + :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - L1-L2 Writes + + - The number of write requests to a vL1D cache line that were sent through + the vL1D to the :doc:`L2 cache `, per + :ref:`normalization unit `. + + - Requests per :ref:`normalization unit ` + + * - L1-L2 Atomics + + - The number of atomic requests that are sent through the vL1D to the + :doc:`L2 cache `, per + :ref:`normalization unit `. This includes requests + for atomics with, and without return. + + - Requests per :ref:`normalization unit ` + + * - L1 Access Latency + + - Calculated as the average number of cycles that a vL1D cache line request + spent in the vL1D cache pipeline. + + - Cycles + + * - L1-L2 Read Access Latency + + - Calculated as the average number of cycles that the vL1D cache took to + issue and receive read requests from the :doc:`L2 Cache `. This + number also includes requests for atomics with return values. + + - Cycles + + * - L1-L2 Write Access Latency + + - Calculated as the average number of cycles that the vL1D cache took to + issue and receive acknowledgement of a write request to the + :doc:`L2 Cache `. This number also includes requests for + atomics without return values. + + - Cycles + +.. note:: + + All cache accesses in vL1D are for a single cache line's worth of data. + The size of a cache line may vary, however on current AMD Instinct MI CDNA + accelerators and GCN™ GPUs the L1 cache line size is 64B. + +.. rubric :: Footnotes + +.. [#vl1d-hit] The vL1D cache on AMD Instinct MI-series CDNA accelerators + uses a "hit-on-miss" approach to reporting cache hits. That is, if while + satisfying a miss, another request comes in that would hit on the same + pending cache line, the subsequent request will be counted as a "hit". + Therefore, it is also important to consider the access latency metric in the + :ref:`Cache access metrics ` section when + evaluating the vL1D hit rate. + +.. [#vl1d-activity] ROCm Compute Profiler considers the vL1D to be active when any part of + the vL1D (excluding the :ref:`address processor ` and + :ref:`data return ` units) are active, for example, when performing + a translation, waiting for data, accessing the Tag or Cache RAMs, etc. + +.. _vl1d-l2-transaction-detail: + +vL1D - L2 Transaction Detail +---------------------------- + +This section provides a more granular look at the types of requests made +to the :doc:`L2 cache `. These are broken down by the operation type +(read / write / atomic, with, or without return), and the +:ref:`memory type `. + +.. _desc-td: + +Vector L1 data-return path or Texture Data (TD) +=============================================== + +The data-return path of the vL1D cache, also known as the Texture Data +(TD) unit, is responsible for routing data returned from the +:ref:`vL1D cache RAM ` back to a wavefront on a SIMD. As described in +the :ref:`vL1D cache front-end ` section, the data-return path is passed +information about the space requirements and routing for data requests +from the :ref:`VALU `. When data is returned from the +:ref:`vL1D cache RAM `, it is matched to this previously stored request +data, and returned to the appropriate SIMD. + +ROCm Compute Profiler reports the following vL1D data-return path metrics: + +.. list-table:: + :header-rows: 1 + + * - Metric + + - Description + + - Unit + + * - Data-return Busy + + - Percent of the :ref:`total CU cycles ` the data-return + unit was busy processing or waiting on data to return to the + :doc:`CU `. + + - Percent + + * - Cache RAM → Data-return Stall + + - Percent of the :ref:`total CU cycles ` the data-return + unit was stalled on data to be returned from the + :ref:`vL1D Cache RAM `. + + - Percent + + * - Workgroup manager → Data-return Stall + + - Percent of the :ref:`total CU cycles ` the data-return + unit was stalled by the :ref:`workgroup manager ` due to + initialization of registers as a part of launching new workgroups. + + - Percent + + * - Coalescable Instructions + + - The number of instructions submitted to the + :ref:`data-return unit ` by the + :ref:`address processor ` that were found to be coalescable, per + :ref:`normalization unit `. + + - Instructions per :ref:`normalization unit ` + + * - Read Instructions + + - The number of read instructions submitted to the + :ref:`data-return unit ` by the + :ref:`address processor ` summed over all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. This is expected to be + the sum of global/generic and spill/stack reads in the + :ref:`address processor `. + + - Instructions per :ref:`normalization unit ` + + * - Write Instructions + + - The number of store instructions submitted to the + :ref:`data-return unit ` by the + :ref:`address processor ` summed over all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. This is expected to be + the sum of global/generic and spill/stack stores counted by the + :ref:`vL1D cache-front-end `. + + - Instructions per :ref:`normalization unit ` + + * - Atomic Instructions + + - The number of atomic instructions submitted to the + :ref:`data-return unit ` by the + :ref:`address processor ` summed over all + :doc:`compute units ` on the accelerator, per + :ref:`normalization unit `. This is expected to be + the sum of global/generic and spill/stack atomics in the + :ref:`address processor `. + + - Instructions per :ref:`normalization unit ` diff --git a/projects/rocprofiler-compute/docs/conf.py b/projects/rocprofiler-compute/docs/conf.py new file mode 100644 index 0000000000..c98dafb60e --- /dev/null +++ b/projects/rocprofiler-compute/docs/conf.py @@ -0,0 +1,98 @@ +##############################################################################bl +# MIT License +# +# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +import re + +with open("../VERSION", encoding="utf-8") as f: + match = re.search(r"([0-9.]+)[^0-9.]+", f.read()) + if not match: + raise ValueError("VERSION not found!") + version_number = match[1] + +# project info +project = "ROCm Compute Profiler" +author = "Advanced Micro Devices, Inc." +copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved." +version = version_number +release = version_number + +extensions = ["rocm_docs", "sphinx.ext.extlinks", "sphinxcontrib.datatemplates"] +html_theme = "rocm_docs_theme" +html_theme_options = {"flavor": "rocm"} +html_title = f"{project} {version_number} documentation" +exclude_patterns = ["archive", "*/includes"] + +html_static_path = ["sphinx/static/css"] +html_css_files = ["o_custom.css"] + +external_toc_path = "./sphinx/_toc.yml" +external_projects_current_project = "rocprofiler-compute" + +# frequently used external resources +extlinks = { + "dev-sample": ( + "https://github.com/ROCm/rocprofiler-compute/blob/amd-mainline/sample/%s", + "%s", + ), + "prod-page": ( + "https://www.amd.com/en/products/accelerators/instinct/%s.html", + "%s", + ), + "llvm-docs": ("https://llvm.org/docs/AMDGPUUsage.html#%s", "%s"), + "amd-lab-note": ("https://gpuopen.com/learn/amd-lab-notes/%s", "%s"), + "cdna2-white-paper": ( + "https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf#page=%s", + "CDNA2 white paper (page %s)", + ), + "gcn-crash-course": ( + "https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah#%s", + "The AMD GCN Architecture - A Crash Course (slide %s)", + ), + "hip-training-pdf": ( + "https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf#page=%s", + "Introduction to AMD GPU Programming with HIP (slide %s)", + ), + "mantor-gcn-pdf": ( + "https://old.hotchips.org/wp-content/uploads/hc_archives/hc24/HC24-3-ManyCore/HC24.28.315-AMD.GCN.mantor_v1.pdf#page=%s", + "AMD Radeon HD7970 with GCN Architecture (slide %s)", + ), + "mantor-vega10-pdf": ( + "https://old.hotchips.org/wp-content/uploads/hc_archives/hc29/HC29.21-Monday-Pub/HC29.21.10-GPU-Gaming-Pub/HC29.21.120-Radeon-Vega10-Mantor-AMD-f1.pdf#page=%s", + "AMD Radeon Next Generation GPU Architecture - Vega10 (slide %s)", + ), + "mi200-isa-pdf": ( + "https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf#page=%s", + "AMD Instinct MI200 ISA Reference Guide (page %s)", + ), + "hsa-runtime-pdf": ( + "http://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf#page=%s", + "HSA Runtime Programmer's Reference Manual (page %s)", + ), +} diff --git a/projects/rocprofiler-compute/docs/data/analyze/cli/mem_chart.png b/projects/rocprofiler-compute/docs/data/analyze/cli/mem_chart.png new file mode 100644 index 0000000000..b372b6d3ca Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/cli/mem_chart.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/cli/roofline_chart.png b/projects/rocprofiler-compute/docs/data/analyze/cli/roofline_chart.png new file mode 100644 index 0000000000..c0b0883091 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/cli/roofline_chart.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/cli/system_speed_of_light.png b/projects/rocprofiler-compute/docs/data/analyze/cli/system_speed_of_light.png new file mode 100644 index 0000000000..ead1c873d9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/cli/system_speed_of_light.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/global_variables.png b/projects/rocprofiler-compute/docs/data/analyze/global_variables.png new file mode 100644 index 0000000000..87f49b5e14 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/global_variables.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/Current_and_baseline_dispatch_ids.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/Current_and_baseline_dispatch_ids.png new file mode 100644 index 0000000000..811bf99692 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/Current_and_baseline_dispatch_ids.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/Kernel_time_histogram.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/Kernel_time_histogram.png new file mode 100644 index 0000000000..8ec0fd83ba Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/Kernel_time_histogram.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/Top_bottleneck_dispatches.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/Top_bottleneck_dispatches.png new file mode 100644 index 0000000000..31d13a0a2f Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/Top_bottleneck_dispatches.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cpc_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpc_panel.png new file mode 100644 index 0000000000..7b7f758588 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpc_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cpf_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpf_panel.png new file mode 100644 index 0000000000..a43b878536 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpf_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-arith-ops_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-arith-ops_panel.png new file mode 100644 index 0000000000..073b64d707 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-arith-ops_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-inst-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-inst-mix_panel.png new file mode 100644 index 0000000000..1b9a6d2b25 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-inst-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png new file mode 100644 index 0000000000..d74dfd271a Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-pipeline-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-pipeline-stats_panel.png new file mode 100644 index 0000000000..6f572f9148 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-pipeline-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-sol_panel.png new file mode 100644 index 0000000000..8e8f46174f Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-value-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-value-arith-instr-mix_panel.png new file mode 100644 index 0000000000..de3750d2d0 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-value-arith-instr-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-vmem-instr-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-vmem-instr-mix_panel.png new file mode 100644 index 0000000000..1d6ce1bc46 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-vmem-instr-mix_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-accesses_panel.png new file mode 100644 index 0000000000..926a7805e7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-sol_panel.png new file mode 100644 index 0000000000..64be7178c6 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-accesses_panel.png new file mode 100644 index 0000000000..101cf77530 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-interface-stalls_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-interface-stalls_panel.png new file mode 100644 index 0000000000..b1bd415ca3 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-interface-stalls_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-transactions_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-transactions_panel.png new file mode 100644 index 0000000000..7df5a78095 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-transactions_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-per-channel-agg-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-per-channel-agg-stats_panel.png new file mode 100644 index 0000000000..704d45c69f Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-per-channel-agg-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-sol_panel.png new file mode 100644 index 0000000000..646e608cbc Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-sol_panel.png new file mode 100644 index 0000000000..c261513aa9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-stats_panel.png new file mode 100644 index 0000000000..0d9d419eb7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/memory-chart_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/memory-chart_panel.png new file mode 100644 index 0000000000..1091a50329 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/memory-chart_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/roofline_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/roofline_panel.png new file mode 100644 index 0000000000..47ee9bddb1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/roofline_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-cache-accesses_panel.png new file mode 100644 index 0000000000..3605cce8a2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-cache-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-l12-interface_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-l12-interface_panel.png new file mode 100644 index 0000000000..5c3480ac9f Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-l12-interface_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-sol_panel.png new file mode 100644 index 0000000000..92fa5a1a4a Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sol_panel.png new file mode 100644 index 0000000000..f456500e02 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-resource-allocation_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-resource-allocation_panel.png new file mode 100644 index 0000000000..bee869ad10 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-resource-allocation_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-stats_panel.png new file mode 100644 index 0000000000..19c7ad3645 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/system-info_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/system-info_panel.png new file mode 100644 index 0000000000..5a5fa01187 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/system-info_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/ta_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/ta_panel.png new file mode 100644 index 0000000000..2f08f9a6b1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/ta_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/td_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/td_panel.png new file mode 100644 index 0000000000..819407515b Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/td_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/top-stat_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/top-stat_panel.png new file mode 100644 index 0000000000..5e3dddca2f Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/top-stat_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-addr-translation_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-addr-translation_panel.png new file mode 100644 index 0000000000..0fb4aaf076 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-addr-translation_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-accesses_panel.png new file mode 100644 index 0000000000..5259b2214f Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-accesses_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-stalls_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-stalls_panel.png new file mode 100644 index 0000000000..61e09c915c Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-stalls_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-l2-transactions_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-l2-transactions_panel.png new file mode 100644 index 0000000000..51875e516c Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-l2-transactions_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-sol_panel.png new file mode 100644 index 0000000000..5c2485d0d7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-sol_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-launch-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-launch-stats_panel.png new file mode 100644 index 0000000000..38e4517f33 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-launch-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-runtime-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-runtime-stats_panel.png new file mode 100644 index 0000000000..517d461d31 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-runtime-stats_panel.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/standalone_gui.png b/projects/rocprofiler-compute/docs/data/analyze/standalone_gui.png new file mode 100644 index 0000000000..a8abd81694 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/standalone_gui.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/tui.png b/projects/rocprofiler-compute/docs/data/analyze/tui.png new file mode 100644 index 0000000000..60f7c2b6f0 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/tui.png differ diff --git a/projects/rocprofiler-compute/docs/data/conceptual/CDNA.png b/projects/rocprofiler-compute/docs/data/conceptual/CDNA.png new file mode 100644 index 0000000000..b3afa30c74 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/conceptual/CDNA.png differ diff --git a/projects/rocprofiler-compute/docs/data/conceptual/CDNA2.png b/projects/rocprofiler-compute/docs/data/conceptual/CDNA2.png new file mode 100644 index 0000000000..a53d91cd37 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/conceptual/CDNA2.png differ diff --git a/projects/rocprofiler-compute/docs/data/conceptual/CDNA3.png b/projects/rocprofiler-compute/docs/data/conceptual/CDNA3.png new file mode 100644 index 0000000000..4a6b84c0f5 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/conceptual/CDNA3.png differ diff --git a/projects/rocprofiler-compute/docs/data/conceptual/CDNA4.png b/projects/rocprofiler-compute/docs/data/conceptual/CDNA4.png new file mode 100644 index 0000000000..d84372f461 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/conceptual/CDNA4.png differ diff --git a/projects/rocprofiler-compute/docs/data/contributing/pre-commit-hook.png b/projects/rocprofiler-compute/docs/data/contributing/pre-commit-hook.png new file mode 100644 index 0000000000..d7af49d978 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/contributing/pre-commit-hook.png differ diff --git a/projects/rocprofiler-compute/docs/data/faq/tunnel_demo1.png b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo1.png new file mode 100644 index 0000000000..bda64883c4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo1.png differ diff --git a/projects/rocprofiler-compute/docs/data/faq/tunnel_demo2.png b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo2.png new file mode 100644 index 0000000000..8b2d258521 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo2.png differ diff --git a/projects/rocprofiler-compute/docs/data/faq/tunnel_demo3.png b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo3.png new file mode 100644 index 0000000000..76cd7ed9a9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo3.png differ diff --git a/projects/rocprofiler-compute/docs/data/install/datasource_config.jpg b/projects/rocprofiler-compute/docs/data/install/datasource_config.jpg new file mode 100644 index 0000000000..4210d9036b Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/datasource_config.jpg differ diff --git a/projects/rocprofiler-compute/docs/data/install/datasource_settings.jpg b/projects/rocprofiler-compute/docs/data/install/datasource_settings.jpg new file mode 100644 index 0000000000..f472362544 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/datasource_settings.jpg differ diff --git a/projects/rocprofiler-compute/docs/data/install/grafana_welcome.png b/projects/rocprofiler-compute/docs/data/install/grafana_welcome.png new file mode 100644 index 0000000000..e564c0a389 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/grafana_welcome.png differ diff --git a/projects/rocprofiler-compute/docs/data/install/grafana_workload_selection.png b/projects/rocprofiler-compute/docs/data/install/grafana_workload_selection.png new file mode 100644 index 0000000000..3ecdc35e72 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/grafana_workload_selection.png differ diff --git a/projects/rocprofiler-compute/docs/data/install/import_dashboard.png b/projects/rocprofiler-compute/docs/data/install/import_dashboard.png new file mode 100644 index 0000000000..29be7ea584 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/import_dashboard.png differ diff --git a/projects/rocprofiler-compute/docs/data/install/install-decision-tree.png b/projects/rocprofiler-compute/docs/data/install/install-decision-tree.png new file mode 100644 index 0000000000..6fe99b01b9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/install-decision-tree.png differ diff --git a/projects/rocprofiler-compute/docs/data/install/omniperf_server_vs_client_install.png b/projects/rocprofiler-compute/docs/data/install/omniperf_server_vs_client_install.png new file mode 100644 index 0000000000..8c43dba9e2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/omniperf_server_vs_client_install.png differ diff --git a/projects/rocprofiler-compute/docs/data/install/opening_dashboard.png b/projects/rocprofiler-compute/docs/data/install/opening_dashboard.png new file mode 100644 index 0000000000..5e6c7ea625 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/opening_dashboard.png differ diff --git a/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_host_trap_single_kernel.png b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_host_trap_single_kernel.png new file mode 100644 index 0000000000..d39158938c Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_host_trap_single_kernel.png differ diff --git a/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_no_kernel_filtering.png b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_no_kernel_filtering.png new file mode 100644 index 0000000000..50b5b5f253 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_no_kernel_filtering.png differ diff --git a/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_sort_by_count.png b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_sort_by_count.png new file mode 100644 index 0000000000..b7e6cdf3c6 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_sort_by_count.png differ diff --git a/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_stochastic_single_kernel.png b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_stochastic_single_kernel.png new file mode 100644 index 0000000000..09b9d631b5 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/pc_sampling/pc_sampling_stochastic_single_kernel.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/fabric.png b/projects/rocprofiler-compute/docs/data/performance-model/fabric.png new file mode 100644 index 0000000000..826b4d9de7 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/fabric.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/fabric.svg b/projects/rocprofiler-compute/docs/data/performance-model/fabric.svg new file mode 100644 index 0000000000..1c98d20810 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/performance-model/fabric.svg @@ -0,0 +1,899 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Total Fabric Requests + + + + + 32B Read Requests + + + + 64B Read Requests + + + + 32B Write Requests + + + + + + 64B Write Requests + + + + + + Uncached Read Requests + + + x2 + + + + Uncached Write Requests + + + + + + Atomic +Requests + + + + + + HBM Read +Requests + + + + + Remote Read +Requests + + + + + + + + + + + + + + + + + + + HBM Write Requests + + + + Remote Write Requests + + + + diff --git a/projects/rocprofiler-compute/docs/data/performance-model/gcn_compute_unit.png b/projects/rocprofiler-compute/docs/data/performance-model/gcn_compute_unit.png new file mode 100644 index 0000000000..e6c1f2eb07 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/gcn_compute_unit.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.png b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.png new file mode 100644 index 0000000000..fdabfbb955 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.svg b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.svg new file mode 100644 index 0000000000..dd22a71319 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.svg @@ -0,0 +1,584 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Compute Unit + Cmd/Data + + + + Address Processing Unit + + + Sync + Data Processing Unit + + Virtual To Physical Address Translation + + Tag RAM + + L1 Cache Controller + + CacheRAM + + L2 Memory Interface + Data + + Bus + + L2 Cache + + + + diff --git a/projects/rocprofiler-compute/docs/data/performance-model/l2perf_model.png b/projects/rocprofiler-compute/docs/data/performance-model/l2perf_model.png new file mode 100644 index 0000000000..49489369a5 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/l2perf_model.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/lds.png b/projects/rocprofiler-compute/docs/data/performance-model/lds.png new file mode 100644 index 0000000000..f444eaf539 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/lds.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/lds.svg b/projects/rocprofiler-compute/docs/data/performance-model/lds.svg new file mode 100644 index 0000000000..c0adb5e912 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/performance-model/lds.svg @@ -0,0 +1,393 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SIMD 0/1 + SIMD 2/3 + + + + + + Conflict Detection + + + + Scheduler + + + + Bank 0 + + + + Bank 1 + + + + Bank 2 + + + + Bank 3 + + + + Bank 31 + + ... + + diff --git a/projects/rocprofiler-compute/docs/data/performance-model/nosplit.png b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.png new file mode 100644 index 0000000000..a8e5f01649 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/nosplit.svg b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.svg new file mode 100644 index 0000000000..d0d9606be5 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/data/performance-model/selayout.png b/projects/rocprofiler-compute/docs/data/performance-model/selayout.png new file mode 100644 index 0000000000..73aa2b49de Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/selayout.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/split.png b/projects/rocprofiler-compute/docs/data/performance-model/split.png new file mode 100644 index 0000000000..cca71eb2a4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/split.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/split.svg b/projects/rocprofiler-compute/docs/data/performance-model/split.svg new file mode 100644 index 0000000000..b033a9e111 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/performance-model/split.svg @@ -0,0 +1,64 @@ + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/data/performance-model/uncached.png b/projects/rocprofiler-compute/docs/data/performance-model/uncached.png new file mode 100644 index 0000000000..f770a1b291 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/uncached.png differ diff --git a/projects/rocprofiler-compute/docs/data/performance-model/uncached.svg b/projects/rocprofiler-compute/docs/data/performance-model/uncached.svg new file mode 100644 index 0000000000..53affd4fc6 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/performance-model/uncached.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + x2 + + diff --git a/projects/rocprofiler-compute/docs/data/profile/sample-roof-plot.jpg b/projects/rocprofiler-compute/docs/data/profile/sample-roof-plot.jpg new file mode 100644 index 0000000000..2deaba7ad2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profile/sample-roof-plot.jpg differ diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.png b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.png new file mode 100644 index 0000000000..bd74d62499 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.png differ diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.svg b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.svg new file mode 100644 index 0000000000..a22a7b84d7 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.svg @@ -0,0 +1,1579 @@ + + + + + + + + 2023-08-21T11:00:20.650499 + image/svg+xml + + + Matplotlib v3.7.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.png b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.png new file mode 100644 index 0000000000..ab057f3cd9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.png differ diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.svg b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.svg new file mode 100644 index 0000000000..c86b4125e4 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.svg @@ -0,0 +1,1050 @@ + + + + + + + + 2023-08-21T11:43:04.336525 + image/svg+xml + + + Matplotlib v3.7.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.png b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.png new file mode 100644 index 0000000000..77c0938581 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.png differ diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.svg b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.svg new file mode 100644 index 0000000000..147da6aa42 --- /dev/null +++ b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.svg @@ -0,0 +1,1145 @@ + + + + + + + + 2023-08-17T18:14:36.907658 + image/svg+xml + + + Matplotlib v3.7.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rocprofiler-compute/docs/data/unused/L1_l2_transactions_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L1_l2_transactions_per_channel.png new file mode 100644 index 0000000000..7b839ab0fe Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L1_l2_transactions_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_latencies_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_latencies_per_channel.png new file mode 100644 index 0000000000..a0b3471974 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_latencies_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_stalls_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_stalls_per_channel.png new file mode 100644 index 0000000000..ac1c5dffb1 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_stalls_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_stalls_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_stalls_per_channel.png new file mode 100644 index 0000000000..d5a1c2c072 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_stalls_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_starvation_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_starvation_per_channel.png new file mode 100644 index 0000000000..49d584621d Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_starvation_per_channel.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/Memory_latencies.png b/projects/rocprofiler-compute/docs/data/unused/Memory_latencies.png new file mode 100644 index 0000000000..3b97d72e0d Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/Memory_latencies.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/Roofline_analysis.png b/projects/rocprofiler-compute/docs/data/unused/Roofline_analysis.png new file mode 100644 index 0000000000..36efd2ea77 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/Roofline_analysis.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/Top_bottleneck_kernels.png b/projects/rocprofiler-compute/docs/data/unused/Top_bottleneck_kernels.png new file mode 100644 index 0000000000..17b8ef7da2 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/Top_bottleneck_kernels.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/fig_level_counter.png b/projects/rocprofiler-compute/docs/data/unused/fig_level_counter.png new file mode 100644 index 0000000000..fa50539a0c Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/fig_level_counter.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/install-decision-tree.png b/projects/rocprofiler-compute/docs/data/unused/install-decision-tree.png new file mode 100644 index 0000000000..1c62fba87b Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/install-decision-tree.png differ diff --git a/projects/rocprofiler-compute/docs/data/unused/omniperf_architecture.png b/projects/rocprofiler-compute/docs/data/unused/omniperf_architecture.png new file mode 100644 index 0000000000..966ac2d608 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/omniperf_architecture.png differ diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst new file mode 100644 index 0000000000..d923b0426d --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -0,0 +1,317 @@ +.. meta:: + :description: ROCm Compute Profiler analysis: CLI analysis + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, command line, analyze, filtering, metrics, baseline, comparison + +************ +CLI analysis +************ + +This section provides an overview of ROCm Compute Profiler's CLI analysis features. + +* :ref:`Derived metrics `: All of ROCm Compute Profiler's built-in metrics. + +* :ref:`Baseline comparison `: Compare multiple + runs in a side-by-side manner. + +* :ref:`Metric customization `: Isolate a subset of + built-in metrics or build your own profiling configuration. + +* :ref:`Filtering `: Hone in on a particular kernel, + GPU ID, or dispatch ID via post-process filtering. + +Run ``rocprof-compute analyze -h`` for more details. + +.. _cli-walkthrough: + +Walkthrough +=========== + +1. To begin, generate a high-level analysis report using ROCm Compute Profiler's ``-b`` (or ``--block``) flag. + +There are three high-level GPU analysis views: + +* System Speed-of-Light: Key GPU performance metrics to show overall GPU performance and utilization. +* Memory chart: Shows memory transactions and throughput on each cache hierarchical level. +* Empirical hierarchical roofline: Roofline model that compares achieved throughput with attainable peak hardware limits, more specifically peak compute throughput and memory bandwidth (on L1/LDS/L2/HBM). + +**System Speed-of-Light:** + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -b 2 + +.. image:: ../../data/analyze/cli/system_speed_of_light.png + :align: left + :alt: System Speed Of Light + +**Memory chart:** + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -b 3 + +.. image:: ../../data/analyze/cli/mem_chart.png + :align: left + :alt: Memory Chart + +**Empirical hierarchical roofline:** + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -b 4 + +.. image:: ../../data/analyze/cli/roofline_chart.png + :align: left + :alt: Roofline + +.. note:: + * Visualized memory chart and Roofline chart are only supported in single run analysis. In multiple runs comparison mode, both are switched back to basic table view. + * Visualized memory chart requires the width of the terminal output to be greater than or equal to 234 to display the whole chart properly. + * Visualized Roofline chart is adapted to the initial terminal size only. If it is not clear, you may need to adjust the terminal size and regenerate it to check the display effect. + +.. _cli-list-metrics: + +2. Use ``--list-metrics`` to generate a list of available metrics for inspection. + + .. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + Analysis mode = cli + [analysis] deriving rocprofiler-compute metrics... + 0 -> Top Stats + 1 -> System Info + 2 -> System Speed-of-Light + 2.1 -> Speed-of-Light + 2.1.0 -> VALU FLOPs + 2.1.1 -> VALU IOPs + 2.1.2 -> MFMA FLOPs (BF16) + 2.1.3 -> MFMA FLOPs (F16) + 2.1.4 -> MFMA FLOPs (F32) + 2.1.5 -> MFMA FLOPs (F64) + 2.1.6 -> MFMA IOPs (Int8) + 2.1.7 -> Active CUs + 2.1.8 -> SALU Utilization + 2.1.9 -> VALU Utilization + 2.1.10 -> MFMA Utilization + 2.1.11 -> VMEM Utilization + 2.1.12 -> Branch Utilization + 2.1.13 -> VALU Active Threads + 2.1.14 -> IPC + 2.1.15 -> Wavefront Occupancy + 2.1.16 -> Theoretical LDS Bandwidth + 2.1.17 -> LDS Bank Conflicts/Access + 2.1.18 -> vL1D Cache Hit Rate + 2.1.19 -> vL1D Cache BW + 2.1.20 -> L2 Cache Hit Rate + 2.1.21 -> L2 Cache BW + 2.1.22 -> L2-Fabric Read BW + 2.1.23 -> L2-Fabric Write BW + 2.1.24 -> L2-Fabric Read Latency + 2.1.25 -> L2-Fabric Write Latency + 2.1.26 -> sL1D Cache Hit Rate + 2.1.27 -> sL1D Cache BW + 2.1.28 -> L1I Hit Rate + 2.1.29 -> L1I BW + 2.1.30 -> L1I Fetch Latency + ... + +3. Choose your own customized subset of metrics with the ``-b`` (or ``--block``) + option. Or, build your own configuration following + `config_template `_. + The following snippet shows how to generate a report containing only metric 2 + (:doc:`System Speed-of-Light `). + + .. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -b 2 + + -------- + Analyze + -------- + + -------------------------------------------------------------------------------- + 1. Top Stat + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20000.00 │ 20000.00 │ 20000.00 │ 100.00 │ + │ │ int) [clone .kd] │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 2. System Speed-of-Light + ╒═════════╤═══════════════════════════╤═══════════════════════╤══════════════════╤════════════════════╤════════════════════════╕ + │ Index │ Metric │ Value │ Unit │ Peak │ PoP │ + ╞═════════╪═══════════════════════════╪═══════════════════════╪══════════════════╪════════════════════╪════════════════════════╡ + │ 2.1.0 │ VALU FLOPs │ 0.0 │ Gflop │ 22630.4 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.1 │ VALU IOPs │ 367.0016 │ Giop │ 22630.4 │ 1.6217194570135745 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.2 │ MFMA FLOPs (BF16) │ 0.0 │ Gflop │ 90521.6 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.3 │ MFMA FLOPs (F16) │ 0.0 │ Gflop │ 181043.2 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.4 │ MFMA FLOPs (F32) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.5 │ MFMA FLOPs (F64) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.6 │ MFMA IOPs (Int8) │ 0.0 │ Giop │ 181043.2 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.7 │ Active CUs │ 74 │ Cus │ 104 │ 71.15384615384616 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.8 │ SALU Util │ 4.016057506716307 │ Pct │ 100 │ 4.016057506716307 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.9 │ VALU Util │ 5.737225009594725 │ Pct │ 100 │ 5.737225009594725 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.10 │ MFMA Util │ 0.0 │ Pct │ 100 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.11 │ VALU Active Threads/Wave │ 64.0 │ Threads │ 64 │ 100.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.12 │ IPC - Issue │ 1.0 │ Instr/cycle │ 5 │ 20.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.13 │ LDS BW │ 0.0 │ Gb/sec │ 22630.4 │ 0.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.14 │ LDS Bank Conflict │ │ Conflicts/access │ 32 │ │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.15 │ Instr Cache Hit Rate │ 99.91306912556854 │ Pct │ 100 │ 99.91306912556854 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.16 │ Instr Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.17 │ Scalar L1D Cache Hit Rate │ 99.81986908342313 │ Pct │ 100 │ 99.81986908342313 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.18 │ Scalar L1D Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.19 │ Vector L1D Cache Hit Rate │ 50.0 │ Pct │ 100 │ 50.0 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.20 │ Vector L1D Cache BW │ 1677.7216 │ Gb/s │ 11315.199999999999 │ 14.82714932126697 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.21 │ L2 Cache Hit Rate │ 35.55067615693325 │ Pct │ 100 │ 35.55067615693325 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.22 │ L2-Fabric Read BW │ 419.8496 │ Gb/s │ 1638.4 │ 25.6255859375 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.23 │ L2-Fabric Write BW │ 293.9456 │ Gb/s │ 1638.4 │ 17.941015625 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.24 │ L2-Fabric Read Latency │ 256.6482321288385 │ Cycles │ │ │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.25 │ L2-Fabric Write Latency │ 317.2264255699014 │ Cycles │ │ │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.26 │ Wave Occupancy │ 1821.723057333852 │ Wavefronts │ 3328 │ 54.73927455931046 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.27 │ Instr Fetch BW │ 4.174722306564298e-08 │ Gb/s │ 3046.4 │ 1.3703789084047721e-09 │ + ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤ + │ 2.1.28 │ Instr Fetch Latency │ 21.729248046875 │ Cycles │ │ │ + ╘═════════╧═══════════════════════════╧═══════════════════════╧══════════════════╧════════════════════╧════════════════════════╛ + + .. note:: + + Some cells may be blank indicating a missing or unavailable hardware + counter or NULL value. + +4. Optimize the application, iterate, and re-profile to inspect performance + changes. + +5. Redo a comprehensive analysis with ROCm Compute Profiler CLI at any optimization + milestone. + +.. _cli-analysis-options: + +More analysis options +===================== + +Single run + .. code-block:: shell + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ + +List top kernels and dispatches + .. code-block:: shell + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-stats + +List metrics + .. code-block:: shell + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a + +Show System Speed-of-Light and CS_Busy blocks only + .. code-block:: shell + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -b 2 5.1.0 + +.. note:: + + You can filter a single metric or the whole hardware component by its ID. In + this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for + GPU Busy Cycles metric. + +Filter kernels + First, list the top kernels in your application using `--list-stats`. + + .. code-block:: + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-stats + + Analysis mode = cli + [analysis] deriving rocprofiler-compute metrics... + + -------------------------------------------------------------------------------- + Detected Kernels (sorted descending by duration) + ╒════╤══════════════════════════════════════════════╕ + │ │ Kernel_Name │ + ╞════╪══════════════════════════════════════════════╡ + │ 0 │ vecCopy(double*, double*, double*, int, int) │ + ╘════╧══════════════════════════════════════════════╛ + + -------------------------------------------------------------------------------- + Dispatch list + ╒════╤═══════════════╤══════════════════════════════════════════════╤══════════╕ + │ │ Dispatch_ID │ Kernel_Name │ GPU_ID │ + ╞════╪═══════════════╪══════════════════════════════════════════════╪══════════╡ + │ 0 │ 0 │ vecCopy(double*, double*, double*, int, int) │ 0 │ + ╘════╧═══════════════╧══════════════════════════════════════════════╧══════════╛ + + Second, select the index of the kernel you would like to filter; for example, + ``vecCopy(double*, double*, double*, int, int) [clone .kd]`` at index ``0``. + Then, use this index to apply the filter via ``-k`` or ``--kernels``. + + .. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -k 0 + + Analysis mode = cli + [analysis] deriving rocprofiler-compute metrics... + + -------------------------------------------------------------------------------- + 0. Top Stats + 0.1 Top Kernels + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╤═════╕ + │ │ Kernel_Name │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ S │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╪═════╡ + │ 0 │ vecCopy(double*, double*, double*, int, │ 1.00 │ 18560.00 │ 18560.00 │ 18560.00 │ 100.00 │ * │ + │ │ int) │ │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╧═════╛ + ... + + You should see your filtered kernels indicated by an asterisk in the **Top + Stats** table. + + +Baseline comparison + .. code-block:: shell + + rocprof-compute analyze -p workload1/path/ -p workload2/path/ + + OR + + .. code-block:: shell + + rocprof-compute analyze -p workload1/path/ -k 0 -p workload2/path/ -k 1 diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/grafana-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/grafana-gui.rst new file mode 100644 index 0000000000..105bc0fefb --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/analyze/grafana-gui.rst @@ -0,0 +1,1076 @@ +.. meta:: + :description: ROCm Compute Profiler analysis: Grafana GUI + :keywords: Omniperf, ROCm Compute Profiler, ROCm, profiler, tool, + Instinct, accelerator, Grafana, panels, GUI, import + +******************** +Grafana GUI analysis +******************** + +.. warning:: + + Grafana and MongoDB functionality is deprecated and will be removed in a future release. + +Find setup instructions in :doc:`../../install/grafana-setup`. + +The ROCm Compute Profiler Grafana analysis dashboard GUI supports the following features to +facilitate MI accelerator performance profiling and analysis: + +* System and hardware component (hardware block) + +* Speed-of-Light (SOL) + +* Multiple normalization options + +* Baseline comparisons + +* Regex-based dispatch ID filtering + +* Roofline analysis + +* Detailed performance counters and metrics per hardware component, such as: + + * Command Processor - Fetch (CPF) / Command Processor - Controller (CPC) + + * Workgroup Manager (SPI) + + * Shader Sequencer (SQ) + + * Shader Sequencer Controller (SQC) + + * L1 Address Processing Unit, a.k.a. Texture Addresser (TA) / L1 Backend Data + Processing Unit, a.k.a. Texture Data (TD) + + * L1 Cache (TCP) + + * L2 Cache (TCC) (both aggregated and per-channel perf info) + +See the full list of :ref:`ROCm Compute Profiler's analysis panels `. + +.. _analysis-sol: + +Speed-of-Light +-------------- + +Speed-of-Light panels are provided at both the system and per hardware component +level to help diagnosis performance bottlenecks. The performance numbers of the +workload under testing are compared to the theoretical maximum, such as floating +point operations, bandwidth, cache hit rate, etc., to indicate the available +room to further utilize the hardware capability. + +.. _analysis-normalizations: + +Normalizations +-------------- + +Multiple performance number normalizations are provided to allow performance +inspection within both hardware and software context. The following +normalizations are available. + +* ``per_wave`` + +* ``per_cycle`` + +* ``per_kernel`` + +* ``per_second`` + +See :ref:`normalization-units` to learn more about ROCm Compute Profiler normalizations. + +.. _analysis-baseline-comparison: + +Baseline comparison +------------------- + +ROCm Compute Profiler enables baseline comparison to allow checking A/B effect. Currently +baseline comparison is limited to the same :ref:`SoC `. Cross +comparison between SoCs is in development. + +For both the Current Workload and the Baseline Workload, you can independently +setup the following filters to allow fine grained comparisons: + +* Workload Name + +* GPU ID filtering (multi-selection) + +* Kernel Name filtering (multi-selection) + +* Dispatch ID filtering (regex filtering) + +* ROCm Compute Profiler Panels (multi-selection) + +.. _analysis-regex-dispatch-id: + +Regex-based dispatch ID filtering +--------------------------------- + +ROCm Compute Profiler allows filtering via Regular Expressions (regex), a standard Linux +string matching syntax, based dispatch ID filtering to flexibly choose the +kernel invocations. + +For example, to inspect Dispatch Range from 17 to 48, inclusive, the +corresponding regex is : ``(1[7-9]|[23]\d|4[0-8])``. + +.. tip:: + + Try `Regex Numeric Range Generator `_ for help + generating typical number ranges. + +.. _analysis-incremental-profiling: + +Incremental profiling +--------------------- + +ROCm Compute Profiler supports incremental profiling to speed up performance analysis. + +Refer to the :ref:`profiling-hw-component-filtering` section for this command. + +By default, the entire application is profiled to collect performance counters +for all hardware blocks, giving a complete view of where the workload stands in +terms of performance optimization opportunities and bottlenecks. + +You can choose to focus on only a few hardware components -- for example L1 +cache or LDS -- to closely check the effect of software optimizations, without +performing application replay for *all* other hardware components. This saves +a lot of compute time. In addition, prior profiling results for other hardware +components are not overwritten; instead, they can be merged during the import to +piece together an overall profile of the system. + +.. _analysis-color-coding: + +Color coding +------------ + +Uniform color coding applies to most visualizations -- including bar graphs, +tables, and diagrams -- for easy inspection. As a rule of thumb, *yellow* means +over 50%, while *red* means over 90% percent. + +Global variables and configurations +----------------------------------- + +.. image:: ../../data/analyze/global_variables.png + :align: center + :alt: ROCm Compute Profiler global variables and configurations + :width: 800 + +.. _grafana-gui-import: + +Grafana GUI import +------------------ + +The ROCm Compute Profiler database ``--import`` option imports the raw profiling data to +Grafana's backend MongoDB database. This step is only required for Grafana +GUI-based performance analysis. + +Default username and password for MongoDB (to be used in database mode) are as +follows: + +* **Username**: ``temp`` + +* **Password**: ``temp123`` + +Each workload is imported to a separate database with the following naming +convention: + +.. code-block:: shell + + rocprofiler-compute___ + +For example: + +.. code-block:: shell + + rocprofiler-compute_asw_vcopy_mi200 + +When using :ref:`database mode `, be sure to tailor the +connection options to the machine hosting your +:doc:`server-side instance `. Below is the sample +command to import the *vcopy* profiling data, assuming our host machine is +called ``dummybox``. + +.. _grafana-gui-remove: + +.. code-block:: shell-session + + $ rocprof-compute database --help + usage: + + rocprof-compute database [connection options] + + + + ------------------------------------------------------------------------------- + + Examples: + + rocprof-compute database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/ + + rocprof-compute database --remove -H pavii1 -u temp -w rocprofiler-compute_asw_sample_mi200 + + ------------------------------------------------------------------------------- + + + + Help: + -h, --help show this help message and exit + + General Options: + -v, --version show program's version number and exit + -V, --verbose Increase output verbosity (use multiple times for higher levels) + -s, --specs Print system specs. + + Interaction Type: + -i, --import Import workload to ROCm Compute Profiler DB + -r, --remove Remove a workload from ROCm Compute Profiler DB + + Connection Options: + -H , --host Name or IP address of the server host. + -P , --port TCP/IP Port. (DEFAULT: 27018) + -u , --username Username for authentication. + -p , --password The user's password. (will be requested later if it's not set) + -t , --team Specify Team prefix. + -w , --workload Specify name of workload (to remove) or path to workload (to import) + --kernel-verbose Specify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5) + + +ROCm Compute Profiler import for vcopy: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: shell-session + + $ rocprof-compute database --import -H dummybox -u temp -t asw -w workloads/vcopy/mi200/ + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + + Pulling data from /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + The directory exists + Found sysinfo file + KernelName shortening enabled + Kernel name verbose level: 2 + Password: + Password received + -- Conversion & Upload in Progress -- + 0%| | 0/11 [00:00` + + - Kernel time histogram + + - Top ten bottleneck kernels + +- :ref:`System Speed-of-Light ` + + - Speed-of-Light + + - System Info table + +- :ref:`Memory Chart Analysis ` + +- :ref:`Roofline Analysis ` + + - FP32/FP64 + + - FP16/INT8 + +- :ref:`Command Processor ` + + - Command Processor - Fetch (CPF) + + - Command Processor - Controller (CPC) + +- :ref:`Workgroup Manager or Shader Processor Input (SPI) ` + + - SPI Stats + + - SPI Resource Allocations + +- :ref:`Wavefront Launch ` + + - Wavefront Launch Stats + + - Wavefront runtime stats + + - per-SE Wavefront Scheduling performance + +- :ref:`Wavefront Lifetime ` + + - Wavefront lifetime breakdown + + - per-SE wavefront life (average) + + - per-SE wavefront life (histogram) + +- :ref:`Wavefront Occupancy ` + + - per-SE wavefront occupancy + + - per-CU wavefront occupancy + +- :ref:`Compute Unit - Instruction Mix ` + + - per-wave Instruction mix + + - per-wave VALU Arithmetic instruction mix + + - per-wave MFMA Arithmetic instruction mix + +- :ref:`Compute Unit - Compute Pipeline ` + + - Speed-of-Light: Compute Pipeline + + - Arithmetic OPs count + + - Compute pipeline stats + + - Memory latencies + +- :ref:`Local Data Share (LDS) ` + + - Speed-of-Light: LDS + + - LDS stats + +- :ref:`Instruction Cache ` + + - Speed-of-Light: Instruction Cache + + - Instruction Cache Accesses + +- Constant Cache + + - Speed-of-Light: Constant Cache + + - Constant Cache Accesses + + - Constant Cache - L2 Interface stats + +- :ref:`Texture Addresser and Texture Data ` + + - Texture Addresser (TA) + + - Texture Data (TD) + +- L1 Cache + + - Speed-of-Light: L1 Cache + + - L1 Cache Accesses + + - L1 Cache Stalls + + - L1 - L2 Transactions + + - L1 - UTCL1 Interface stats + +- :ref:`L2 Cache ` + + - Speed-of-Light: L2 Cache + + - L2 Cache Accesses + + - L2 - EA Transactions + + - L2 - EA Stalls + +- :ref:`L2 Cache Per Channel Performance ` + + - Per-channel L2 Hit rate + + - Per-channel L1-L2 Read requests + + - Per-channel L1-L2 Write Requests + + - Per-channel L1-L2 Atomic Requests + + - Per-channel L2-EA Read requests + + - Per-channel L2-EA Write requests + + - Per-channel L2-EA Atomic requests + + - Per-channel L2-EA Read latency + + - Per-channel L2-EA Write latency + + - Per-channel L2-EA Atomic latency + + - Per-channel L2-EA Read stall (I/O, GMI, HBM) + + - Per-channel L2-EA Write stall (I/O, GMI, HBM, Starve) + +Most panels are designed around a specific hardware component block to +thoroughly understand its behavior. Additional panels, including custom panels, +could also be added to aid the performance analysis. + +.. _grafana-panel-sys-info: + +System Info +^^^^^^^^^^^ + +.. figure:: ../../data/analyze/grafana/system-info_panel.png + :align: center + :alt: System details logged from the host machine + :width: 800 + + System details logged from the host machine. + +.. _grafana-panel-kernel-stats: + +Kernel Statistics +^^^^^^^^^^^^^^^^^ + +Kernel Time Histogram ++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/Kernel_time_histogram.png + :align: center + :alt: Kernel time histogram panel in ROCm Compute Profiler Grafana + :width: 800 + + Mapping application kernel launches to execution duration. + +Top Bottleneck Kernels +++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/top-stat_panel.png + :align: center + :alt: Top bottleneck kernels panel in ROCm Compute Profiler Grafana + :width: 800 + + Top N kernels and relevant statistics. Sorted by total duration. + +Top Bottleneck Dispatches ++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/Top_bottleneck_dispatches.png + :align: center + :alt: Top bottleneck dispatches panel in ROCm Compute Profiler Grafana + :width: 800 + + Top N kernel dispatches and relevant statistics. Sorted by total duration. + +Current and Baseline Dispatch IDs (Filtered) +++++++++++++++++++++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/Current_and_baseline_dispatch_ids.png + :align: center + :alt: Current and baseline dispatch IDs panel in ROCm Compute Profiler Grafana + :width: 800 + + List of all kernel dispatches. + +.. _grafana-panel-system-sol: + +System Speed-of-Light +^^^^^^^^^^^^^^^^^^^^^ + +.. figure:: ../../data/analyze/grafana/sol_panel.png + :align: center + :alt: System Speed-of-Light panel in ROCm Compute Profiler Grafana + :width: 800 + + Key metrics from various sections of ROCm Compute Profiler’s profiling report. + +.. tip:: + + See :doc:`/conceptual/system-speed-of-light` to learn about reported metrics. + +.. _grafana-panel-memory-chart-analysis: + +Memory Chart Analysis +^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + The Memory Chart Analysis support multiple normalizations. Due to limited + space, all transactions, when normalized to ``per_sec``, default to unit of + billion transactions per second. + +.. figure:: ../../data/analyze/grafana/memory-chart_panel.png + :align: center + :alt: Memory Chart Analysis panel in ROCm Compute Profiler Grafana + :width: 800 + + A graphical representation of performance data for memory blocks on the GPU. + + +.. _grafana-panel-roofline-analysis: + +Empirical Roofline Analysis +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. figure:: ../../data/analyze/grafana/roofline_panel.png + :align: center + :alt: Roofline Analysis panel in ROCm Compute Profiler Grafana + :width: 800 + + Visualize achieved performance relative to a benchmarked peak performance. + + +.. _grafana-panel-cp: + +Command Processor +^^^^^^^^^^^^^^^^^ + +.. tip:: + + See :doc:`/conceptual/command-processor` to learn about reported metrics. + +Command Processor Fetcher ++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cpc_panel.png + :align: center + :alt: Command Processor Fetcher panel in ROCm Compute Profiler Grafana + :width: 800 + + Fetches commands out of memory to hand them over to the Command Processor + Fetcher (CPC) for processing + +Command Processor Compute ++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cpf_panel.png + :align: center + :alt: Command Processor Compute panel in ROCm Compute Profiler Grafana + :width: 800 + + The micro-controller running the command processing firmware that decodes the + fetched commands, and (for kernels) passes them to the Workgroup Managers + (SPIs) for scheduling. + +.. _grafana-panel-spi: + +Shader Processor Input (SPI) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. tip:: + + See :ref:`desc-spi` to learn about reported metrics. + +SPI Stats ++++++++++ + +.. figure:: ../../data/analyze/grafana/spi-stats_panel.png + :align: center + :alt: SPI Stats panel in ROCm Compute Profiler Grafana + :width: 800 + +.. + TODO: Add caption after merge + +SPI Resource Allocation ++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/spi-resource-allocation_panel.png + :align: center + :alt: SPI Resource Allocation panel in ROCm Compute Profiler Grafana + :width: 800 + +.. + TODO: Add caption after merge + +.. _grafana-panel-wavefront: + +Wavefront +^^^^^^^^^ + +Wavefront Launch Stats +++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/wavefront-launch-stats_panel.png + :align: center + :alt: Wavefront Launch Stats panel in ROCm Compute Profiler Grafana + :width: 800 + + General information about the kernel launch. + +.. tip:: + + See :ref:`wavefront-launch-stats` to learn about reported metrics. + +Wavefront Runtime Stats ++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/wavefront-runtime-stats_panel.png + :align: center + :alt: Wavefront Runtime Stats panel in ROCm Compute Profiler Grafana. + :width: 800 + + High-level overview of the execution of wavefronts in a kernel. + +.. tip:: + + See :ref:`wavefront-runtime-stats` to learn about reported metrics. + +.. _grafana-panel-cu-instruction-mix: + +Compute Unit - Instruction Mix +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Instruction Mix ++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-inst-mix_panel.png + :align: center + :alt: Instruction Mix panel in ROCm Compute Profiler Grafana + :width: 800 + + Breakdown of the various types of instructions executed by the user’s kernel, + and which pipelines on the Compute Unit (CU) they were executed on. + +.. tip:: + + See :ref:`instruction-mix` to learn about reported metrics. + +VALU Arithmetic Instruction Mix ++++++++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-value-arith-instr-mix_panel.png + :align: center + :alt: VALU Arithmetic Instruction Mix panel in ROCm Compute Profiler Grafana + :width: 800 + + The various types of vector instructions that were issued to the vector + arithmetic logic unit (VALU). + +.. tip:: + + See :ref:`valu-arith-instruction-mix` to learn about reported metrics. + +MFMA Arithmetic Instruction Mix ++++++++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png + :align: center + :alt: MFMA Arithmetic Instruction Mix panel in ROCm Compute Profiler Grafana + :width: 800 + + The types of Matrix Fused Multiply-Add (MFMA) instructions that were issued. + +.. tip:: + + See :ref:`mfma-instruction-mix` to learn about reported metrics. + +VMEM Arithmetic Instruction Mix ++++++++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-vmem-instr-mix_panel.png + :align: center + :alt: VMEM Arithmetic Instruction Mix panel in ROCm Compute Profiler Grafana + :width: 800 + + The types of vector memory (VMEM) instructions that were issued. + +.. tip:: + + See :ref:`vmem-instruction-mix` to learn about reported metrics. + +.. _grafana-panel-cu-compute-pipeline: + +Compute Unit - Compute Pipeline +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Speed-of-Light +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-sol_panel.png + :align: center + :alt: Speed-of-Light (CU) panel in ROCm Compute Profiler Grafana + :width: 800 + + The number of floating-point and integer operations executed on the vector + arithmetic logic unit (VALU) and Matrix Fused Multiply-Add (MFMA) units in + various precisions. + +.. tip:: + + See :ref:`compute-speed-of-light` to learn about reported metrics. + +Pipeline Stats +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-pipeline-stats_panel.png + :align: center + :alt: Pipeline Stats panel in ROCm Compute Profiler Grafana + :width: 800 + + More detailed metrics to analyze the several independent pipelines found in + the Compute Unit (CU). + +.. tip:: + + See :ref:`pipeline-stats` to learn about reported metrics. + +Arithmetic Operations ++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/cu-arith-ops_panel.png + :align: center + :alt: Arithmetic Operations panel in ROCm Compute Profiler Grafana + :width: 800 + + The total number of floating-point and integer operations executed in various + precisions. + +.. tip:: + + See :ref:`arithmetic-operations` to learn about reported metrics. + +.. _grafana-panel-lds: + +Local Data Share (LDS) +^^^^^^^^^^^^^^^^^^^^^^ + +Speed-of-Light +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/lds-sol_panel.png + :align: center + :alt: Speed-of-Light (LDS) panel in ROCm Compute Profiler Grafana + :width: 800 + + Key metrics for the Local Data Share (LDS) as a comparison with the peak + achievable values of those metrics. + +.. tip:: + + See :ref:`lds-sol` to learn about reported metrics. + +LDS Stats ++++++++++ + +.. figure:: ../../data/analyze/grafana/lds-stats_panel.png + :align: center + :alt: LDS Stats panel in ROCm Compute Profiler Grafana + :width: 800 + + More detailed view of the Local Data Share (LDS) performance. + +.. tip:: + + See :ref:`lds-stats` to learn about reported metrics. + +.. _grafana-panel-instruction-cache: + +Instruction Cache +^^^^^^^^^^^^^^^^^ + +Speed-of-Light +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/instr-cache-sol_panel.png + :align: center + :alt: Speed-of-Light (instruction cache) panel in ROCm Compute Profiler Grafana + :width: 800 + + Key metrics of the L1 Instruction (L1I) cache as a comparison with the peak + achievable values of those metrics. + +.. tip:: + + See :ref:`desc-l1i-sol` to learn about reported metrics. + +Instruction Cache Stats ++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/instr-cache-accesses_panel.png + :align: center + :alt: Instruction Cache Stats panel in ROCm Compute Profiler Grafana + :width: 800 + + More detail on the hit/miss statistics of the L1 Instruction (L1I) cache. + +.. tip:: + + See :ref:`desc-l1i-stats` to learn about reported metrics. + +.. _grafana-panel-sl1d-cache: + +Scalar L1D Cache +^^^^^^^^^^^^^^^^ + +.. tip:: + + See :ref:`desc-sl1d` to learn about reported metrics. + +Speed-of-Light +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/sl1d-sol_panel.png + :align: center + :alt: Speed-of-Light (SL1D) panel in ROCm Compute Profiler Grafana + :width: 800 + + Key metrics of the Scalar L1 Data (sL1D) cache as a comparison with the peak + achievable values of those metrics. + +.. tip:: + + See :ref:`desc-sl1d-sol` to learn about reported metrics. + +Scalar L1D Cache Accesses ++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/sl1d-cache-accesses_panel.png + :align: center + :alt: Scalar L1D Cache Accesses panel in ROCm Compute Profiler Grafana + :width: 800 + + More detail on the types of accesses made to the Scalar L1 Data (sL1D) cache, + and the hit/miss statistics. + +.. tip:: + + See :ref:`desc-sl1d-stats` to learn about reported metrics. + +Scalar L1D Cache - L2 Interface ++++++++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/sl1d-l12-interface_panel.png + :align: center + :alt: Scalar L1D Cache - L2 Interface panel in ROCm Compute Profiler Grafana + :width: 800 + + More detail on the data requested across the Scalar L1 Data (sL1D) cache <-> + L2 interface. + +.. tip:: + + See :ref:`desc-sl1d-l2-interface` to learn about reported metrics. + +.. _grafana-panel-ta: + +Texture Address and Texture Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Texture Addresser ++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/ta_panel.png + :align: center + :alt: Texture Addresser in ROCm Compute Profiler Grafana + :width: 800 + + Metric specific to texture addresser (TA) which receives commands (e.g., + instructions) and write/atomic data from the Compute Unit (CU), and coalesces + them into fewer requests for the cache to process. + +.. tip:: + + See :ref:`desc-ta` to learn about reported metrics. + +.. _grafana-panel-td: + +Texture Data +++++++++++++ + +.. figure:: ../../data/analyze/grafana/td_panel.png + :align: center + :alt: Texture Data panel in ROCm Compute Profiler Grafana + :width: 800 + + Metrics specific to texture data (TD) which routes data back to the + requesting Compute Unit (CU). + +.. tip:: + + See :ref:`desc-td` to learn about reported metrics. + +.. _grafana-panel-vl1d: + +Vector L1 Data Cache +^^^^^^^^^^^^^^^^^^^^ + +Speed-of-Light +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/vl1d-sol_panel.png + :align: center + :alt: Speed-of-Light (VL1D) panel in ROCm Compute Profiler Grafana + :width: 800 + + Key metrics of the vector L1 data (vL1D) cache as a comparison with the peak + achievable values of those metrics. + +.. tip:: + + See :ref:`vl1d-sol` to learn about reported metrics. + +L1D Cache Stalls +++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/vl1d-cache-stalls_panel.png + :align: center + :alt: L1D Cache Stalls panel in ROCm Compute Profiler Grafana + :width: 800 + + More detail on where vector L1 data (vL1D) cache is stalled in the pipeline, + which may indicate performance limiters of the cache. + +.. tip:: + + See :ref:`vl1d-cache-stall-metrics` to learn about reported metrics. + +L1D Cache Accesses +++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/vl1d-cache-accesses_panel.png + :align: center + :alt: L1D Cache Accesses + :width: 800 + + The type of requests incoming from the cache front-end, the number of requests + that were serviced by the vector L1 data (vL1D) cache, and the number & type + of outgoing requests to the L2 cache. + +.. tip:: + + See :ref:`vl1d-cache-access-metrics` to learn about reported metrics. + +L1D - L2 Transactions ++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/vl1d-l2-transactions_panel.png + :align: center + :alt: L1D - L2 Transactions in ROCm Compute Profiler Grafana + :width: 800 + + A more granular look at the types of requests made to the L2 cache. + +.. tip:: + + See :ref:`vl1d-l2-transaction-detail` to learn more. + +L1D Addr Translation +++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/vl1d-addr-translation_panel.png + :align: center + :alt: L1D Addr Translation panel in ROCm Compute Profiler Grafana + :width: 800 + + After a vector memory instruction has been processed/coalesced by the address + processing unit of the vector L1 data (vL1D) cache, it must be translated + from a virtual to physical address. These metrics provide more details on the + L1 Translation Lookaside Buffer (TLB) which handles this process. + +.. tip:: + + See :ref:`desc-utcl1` to learn about reported metrics. + +.. _grafana-panel-l2-cache: + +L2 Cache +^^^^^^^^ + +.. tip:: + + See :doc:`/conceptual/l2-cache` to learn about reported metrics. + +Speed-of-Light +++++++++++++++ + +.. figure:: ../../data/analyze/grafana/l2-sol_panel.png + :align: center + :alt: Speed-of-Light (L2 cache) panel in ROCm Compute Profiler Grafana + :width: 800 + + Key metrics about the performance of the L2 cache, aggregated over all the + L2 channels, as a comparison with the peak achievable values of those + metrics. + +.. tip:: + + See :ref:`l2-sol` to learn about reported metrics. + +L2 Cache Accesses ++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/l2-accesses_panel.png + :align: center + :alt: L2 Cache Accesses panel in ROCm Compute Profiler Grafana + :width: 800 + + Incoming requests to the L2 cache from the vector L1 data (vL1D) cache and + other clients (e.g., the sL1D and L1I caches). + +.. tip:: + + See :ref:`l2-cache-accesses` to learn about reported metrics. + +L2 - Fabric Transactions +++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/l2-fabric-transactions_panel.png + :align: center + :alt: L2 - Fabric Transactions panel in ROCm Compute Profiler Grafana + :width: 800 + + More detail on the flow of requests through Infinity Fabric™. + +.. tip:: + + See :ref:`l2-fabric` to learn about reported metrics. + +L2 - Fabric Interface Stalls +++++++++++++++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/l2-fabric-interface-stalls_panel.png + :align: center + :alt: L2 - Fabric Interface Stalls panel in ROCm Compute Profiler Grafana + :width: 800 + + A breakdown of what types of requests in a kernel caused a stall + (e.g., read vs write), and to which locations (e.g., to the accelerator’s + local memory, or to remote accelerators/CPUs). + +.. tip:: + + See :ref:`l2-fabric-stalls` to learn about reported metrics. + +.. _grafana-panel-l2-cache-per-channel: + +L2 Cache Per Channel +^^^^^^^^^^^^^^^^^^^^ + +.. tip:: + + See :ref:`l2-sol` for more information. + +Aggregate Stats ++++++++++++++++ + +.. figure:: ../../data/analyze/grafana/l2-per-channel-agg-stats_panel.png + :align: center + :alt: Aggregate Stats (L2 cache per channel) panel in ROCm Compute Profiler Grafana + :width: 800 + + L2 Cache per channel performance at a glance. Metrics are aggregated over all available channels. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst b/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst new file mode 100644 index 0000000000..6ca7f4a773 --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst @@ -0,0 +1,37 @@ +.. meta:: + :description: How to use ROCm Compute Profiler's analyze mode + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, + Grafana, analysis, analyze mode + +************ +Analyze mode +************ + +ROCm Compute Profiler offers several ways to interact with the metrics it generates from +profiling. Your level of familiarity with the profiled application, computing +environment, and experience with ROCm Compute Profiler should inform the analysis method you +choose. + +While analyzing with the CLI offers quick and straightforward access to ROCm Compute Profiler +metrics from the terminal, Grafana's dashboard GUI adds an extra layer of +readability and interactivity you might prefer. + +See the following sections to explore ROCm Compute Profiler's analysis and visualization +options. + +* :doc:`cli` +* :doc:`grafana-gui` +* :doc:`standalone-gui` +* :doc:`text-based user interface (TUI)` + +.. note:: + + Analysis examples in this chapter borrow profiling results from the + ``vcopy.cpp`` workload introduced in :ref:`profile-example` in the + previous chapter. + + Unless otherwise noted, the performance analysis is done on the + :ref:`MI200 platform `. + +Learn about profiling with ROCm Compute Profiler in :doc:`../profile/mode`. For an overview of +ROCm Compute Profiler's other modes, see :ref:`modes`. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst new file mode 100644 index 0000000000..1c79c816d9 --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst @@ -0,0 +1,94 @@ +.. meta:: + :description: ROCm Compute Profiler analysis: Standalone GUI + :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, GUI, standalone, filter + +*********************** +Standalone GUI analysis +*********************** + +ROCm Compute Profiler's standalone analysis GUI is a lightweight web page that you can +generate straight from the command line. The standalone analysis GUI is an +alternative to the CLI if you want to explore profiling results visually, but +without the additional setup requirements or server-side overhead of ROCm Compute Profiler's +detailed :doc:`Grafana interface ` option. This analysis +option is implemented as a simple `Flask `_ +application that lets you view results from your preferred web browser. + +.. note:: + + A point on *port forwarding*: the standalone GUI analyzer publishes its + web-based interface on port ``8050`` by default. On production HPC systems + where profiling jobs run under the control of a resource manager, additional + SSH tunneling between the desired web browser host (such as a login node or + remote workstation) and compute host may be required. Alternatively, you + might find it more convenient to download profiled workloads to perform + analysis on a local system. + + See the :doc:`/reference/faq` for more details on SSH tunneling. + +Launch the standalone GUI analyzer +---------------------------------- + +To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your +desired analysis command. For example: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vcopy/MI200/ --gui + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + Analysis mode = web_ui + [analysis] deriving rocprofiler-compute metrics... + Dash is running on http://0.0.0.0:8050/ + + * Serving Flask app 'rocprof_compute_analyze.analysis_webui' (lazy loading) + * Environment: production + WARNING: This is a development server. Do not use it in a production deployment. + Use a production WSGI server instead. + * Debug mode: off + * Running on all addresses (0.0.0.0) + WARNING: This is a development server. Do not use it in a production deployment. + * Running on http://127.0.0.1:8050 + * Running on http://10.228.33.172:8050 (Press CTRL+C to quit) + +At this point, you can launch your web browser of choice and navigate to +``http://localhost:8050/`` to view the analysis interface. + +.. image:: ../../data/analyze/standalone_gui.png + :align: center + :alt: ROCm Compute Profiler standalone GUI home screen + :width: 800 + +.. tip:: + + To launch the standalone GUI analyzer web app on a port other than ``8050``, + include the optional argument ``--gui ``. + +When no filters are applied, you'll see five basic sections derived from your +application's profiling data: + +#. Memory Chart Analysis +#. Empirical Roofline Analysis + + Use ``--roofline-data-type`` option to specify which data type(s) you would like displayed on the roofline PDFs in the standalone analysis GUI. + Data types can be stacked- for example, "--roofline-data-type FP32 FP64 I32" would display one PDF with FP32 and FP64 stacked, and one PDF with INT32. + Default roofline data type plotted is FP32. + +#. Top Stats (Top Kernel Statistics) +#. System Info +#. System Speed-of-Light + +To dive deeper, use the dropdown menus at the top of the screen to isolate +particular kernels or dispatches. You should see the web page update with +metrics specific to your selected filters. + +Once a filter is applied, you'll see several additional sections become +available with detailed metrics specific to that area of AMD hardware. These +detailed sections mirror the data displayed in ROCm Compute Profiler's +:doc:`Grafana interface `. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/tui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/tui.rst new file mode 100644 index 0000000000..5a0f92ff51 --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/analyze/tui.rst @@ -0,0 +1,72 @@ +.. meta:: + :description: ROCm Compute Profiler analysis: Text-based User Interface + :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, GUI, standalone, filter + +**************************************** +Text-based User Interface (TUI) analysis +**************************************** + +ROCm Compute Profiler's analyze mode now supports a lightweight Text-based User Interface (TUI) +that provides an interactive terminal experience for enhanced usability. You can use the TUI +interface as a more visually engaging and interactive alternative to explore analysis results +compared to the standard :doc:`cli`. It provides enhanced visual feedback and easy navigation without +needing the extra setup of a full graphical interface. This analysis option is implemented as a +terminal-based interface that offers real-time visual feedback, keyboard shortcuts for common +actions, and improved readability with formatted output. + +.. note:: + + TUI is currently in an early access state. While functional, you may encounter minor issues or limitations. + For the limitations identified, see :ref:`Current limitations ` + Running production workloads is not recommended. + +Launch the TUI analyzer +---------------------------------- + +1. Use the ``--tui`` flag with the analysis command to launch the ROCm Compute Profiler TUI analyzer. +For example: + +.. code-block:: shell-session + + $ rocprof-compute analyze --tui + +2. To start the analysis, use the dropdown menu at the top left of the screen to select a single +workload from ``rocprof-compute profile`` generated output directories. + +.. image:: ../../data/analyze/tui.png + :align: center + :alt: ROCm Compute Profiler TUI home screen + :width: 800 + +3. You can see the center window update with collapsed contents. Uncollapse to view tables, charts, +and graphs visualizing the analysis data. + +4. After the analysis results are loaded, you can start interactive analysis with detailed metrics. +The TUI supports basic keyboard shortcuts, including quit application commands for easy navigation. + +TUI analysis structure +---------------------------------- + +Unlike the :doc:`cli` plain style interfaces, the TUI restructures the analysis workflow into four +hierarchical categories to provide a more organized, top-down analysis approach: + +1. Top Stat +2. High Level analysis +3. Detailed block analysis +4. Source Level analysis + +You are recommended to follow this top-down hierarchical structure to conduct a thorough performance +analysis, starting with the broad overview and progressively drilling down to specific details. + +.. _tui-limitation: + +Current limitations +---------------------------------- +The TUI implementation has several limitations that will be addressed in future releases: + +* **PC sampling**: Source Level analysis does not have PC sampling enabled by default during the + profiling stage. Refer to :doc:`../pc_sampling` for details on how to build and enable PC sampling + manually. + +* **Filtering capabilities**: Advanced filtering options such as kernel filtering and dispatch + filtering are currently not supported. These features will be available in upcoming releases. diff --git a/projects/rocprofiler-compute/docs/how-to/pc_sampling.rst b/projects/rocprofiler-compute/docs/how-to/pc_sampling.rst new file mode 100644 index 0000000000..64aff4ae2a --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/pc_sampling.rst @@ -0,0 +1,74 @@ +.. meta:: + :description: ROCm Compute Profiler: using PC sampling + :keywords: ROCm Compute Profiler, PC sampling + +******************************************** +Using PC sampling in ROCm Compute Profiler +******************************************** + +Program Counter (PC) sampling service for GPU profiling is a profiling technique that periodically samples the program counter during the GPU kernel execution to understand code execution patterns and hotspots. + +ROCm Compute Profiler supports Host Trap PC sampling and Stochastic (Hardware-Based) PC sampling. +Host Trap PC sampling is enabled for AMD Instinct MI200 series and later +accelerators. Stochastic (hardware-based) PC sampling is enabled for +AMD Instinct MI300 series and later accelerators. Stochastic PC sampling provides additional information that tells whether a sampled wave issued an instruction for a particular PC. It also provides the reason +for not issuing the instruction (stall reason). This type of information is +particularly useful for understanding stalls during the kernel execution. The PC sampling can be used with profiling and analysis options. + +--------------------- +Profiling options +--------------------- +For using profiling options for PC sampling the configuration needed are: + +* ``--pc-sampling-method``: Should be either ``stochastic`` or ``host_trap``, (DEFAULT: stochastic) +* ``--pc-sampling-interval``: For stochastic sampling, the interval is in cycles. The finest granularity is 1 cycle. For ``host_trap`` sampling, the interval is in microsecond (DEFAULT: 1048576). The interval should be the power of 2. You are recommended try starting from 1048576, and lowering until reaching 65536. + +**Sample command:** + +.. code-block:: shell + + $ rocprof-compute profile -n pc_test -b 21 --no-roof --pc-sampling-method stochastic --pc-sampling-interval 1048576 -VVV -- target_app + +----------------------- +Analysis options +----------------------- +For using analysis options for PC sampling the configuration needed are: + +* ``--pc-sampling-sorting-type``: ``offset`` or ``count``. The default option is ``offset``. ``offset`` is an assembly instruction offset in the code object. + +**Sample command:** + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/pc_test/MI300A_A1/ -b 21 -k 0 --pc-sampling-sorting-type offset + +**Sample output:** + +Selecting single kernel host trap PC sampling: + +.. image:: ../data/pc_sampling/pc_sampling_host_trap_single_kernel.png + :align: left + :alt: Host trap PC sampling snapshot + +Selecting single kernel stochastic PC sampling: + +.. image:: ../data/pc_sampling/pc_sampling_stochastic_single_kernel.png + :align: left + :alt: Stochastic PC sampling snapshot + +If you don't filter by kernel, the output will fall back to the original data from ``rocprofv3`` csv output for all the kernels: + +.. image:: ../data/pc_sampling/pc_sampling_no_kernel_filtering.png + :align: left + :alt: Host trap PC sampling snapshot no_kernel_filtering + +Selecting single kernel sorting by PC count: + +.. image:: ../data/pc_sampling/pc_sampling_sort_by_count.png + :align: left + :alt: Host trap PC sampling sorting snapshot + +.. note:: + + * PC sampling feature is currently in BETA version. To enable PC sampling, you have to explicitly enable it with block index 21. + * To associate PC sampling info back to HIP source code, you need to build the profiling target app with ``-g`` to keep the symbols. Otherwise, PC sampling info will be only associated with assembly lines. diff --git a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst new file mode 100644 index 0000000000..834358c0ba --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst @@ -0,0 +1,537 @@ +.. meta:: + :description: How to use ROCm Compute Profiler's profile mode + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, + profiling, profile mode + +************ +Profile mode +************ + +The following chapter walks you through ROCm Compute Profiler's core profiling features by +example. + +Learn about analysis with ROCm Compute Profiler in :doc:`../analyze/mode`. For an overview of +ROCm Compute Profiler's other modes, see :ref:`modes`. + +Profiling +========= + +Use the ``rocprof-compute`` executable to acquire all necessary performance monitoring +data through analysis of compute workloads. + +Profiling with ROCm Compute Profiler yields the following benefits. + +* :ref:`Automate counter collection `: ROCm Compute Profiler handles all + of your profiling via pre-configured input files. + +* :ref:`Filtering `: Apply runtime filters to speed up the profiling + process. + +* :ref:`Standalone roofline `: Isolate a subset of built-in + metrics or build your own profiling configuration. + +Run ``rocprof-compute profile -h`` for more details. See +:ref:`Basic usage `. + +.. _profile-example: + +Profiling example +----------------- + +The ``__ repository +includes source code for a sample GPU compute workload, ``vcopy.cpp``. A copy of +this file is available in the ``share/sample`` subdirectory after a normal +ROCm Compute Profiler installation, or via the ``$ROCPROFCOMPUTE_SHARE/sample`` directory when +using the supplied modulefile. + +The examples in this section use a compiled version of the ``vcopy`` workload to +demonstrate the use of ROCm Compute Profiler in MI accelerator performance analysis. Unless +otherwise noted, the performance analysis is done on the +:ref:`MI200 platform `. + +Workload compilation +^^^^^^^^^^^^^^^^^^^^ + +The following example demonstrates compilation of ``vcopy``. + +.. code-block:: shell-session + + $ hipcc vcopy.cpp -o vcopy + $ ls + vcopy vcopy.cpp + $ ./vcopy -n 1048576 -b 256 + vcopy testing on GCD 0 + Finished allocating vectors on the CPU + Finished allocating vectors on the GPU + Finished copying vectors to the GPU + sw thinks it moved 1.000000 KB per wave + Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384: + Launching the kernel on the GPU + Finished executing kernel + Finished copying the output vector from the GPU to the CPU + Releasing GPU memory + Releasing CPU memory + +The following sample command profiles the ``vcopy`` workload. + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy -- ./vcopy -n 1048576 -b 256 + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + rocprofiler-compute version: 2.0.0 + Profiler choice: rocprofv1 + Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Target: MI200 + Command: ./vcopy -n 1048576 -b 256 + Kernel Selection: None + Dispatch Selection: None + Hardware Blocks: All + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Collecting Performance Counters + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + [profiling] Current input file: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200/perfmon/SQ_IFETCH_LEVEL.txt + |-> [rocprof] RPL: on '240312_174329' from '/opt/rocm-5.2.1' in '/home/auser/repos/rocprofiler-compute/src/rocprof-compute' + |-> [rocprof] RPL: profiling '""./vcopy -n 1048576 -b 256""' + |-> [rocprof] RPL: input file '/home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200/perfmon/SQ_IFETCH_LEVEL.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_240312_174329_692890' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_240312_174329_692890/input0_results_240312_174329' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_240312_174329_692890/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 6 metrics + |-> [rocprof] GRBM_COUNT, GRBM_GUI_ACTIVE, SQ_WAVES, SQ_IFETCH, SQ_IFETCH_LEVEL, SQ_ACCUM_PREV_HIRES + |-> [rocprof] vcopy testing on GCD 0 + |-> [rocprof] Finished allocating vectors on the CPU + |-> [rocprof] Finished allocating vectors on the GPU + |-> [rocprof] Finished copying vectors to the GPU + |-> [rocprof] sw thinks it moved 1.000000 KB per wave + |-> [rocprof] Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384: + |-> [rocprof] Launching the kernel on the GPU + |-> [rocprof] Finished executing kernel + |-> [rocprof] Finished copying the output vector from the GPU to the CPU + |-> [rocprof] Releasing GPU memory + |-> [rocprof] Releasing CPU memory + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 1 contexts collected, output directory /tmp/rpl_data_240312_174329_692890/input0_results_240312_174329 + |-> [rocprof] File '/home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200/SQ_IFETCH_LEVEL.csv' is generating + |-> [rocprof] + [profiling] Current input file: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200/perfmon/SQ_INST_LEVEL_LDS.txt + + ... + + [roofline] Checking for roofline.csv in /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + [roofline] No roofline data found. Generating... + Empirical Roofline Calculation + Copyright © 2022 Advanced Micro Devices, Inc. All rights reserved. + Total detected GPU devices: 4 + GPU Device 0: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + HBM BW, GPU ID: 0, workgroupSize:256, workgroups:2097152, experiments:100, traffic:8589934592 bytes, duration:6.2 ms, mean:1388.0 GB/sec, stdev=3.1 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + L2 BW, GPU ID: 0, workgroupSize:256, workgroups:8192, experiments:100, traffic:687194767360 bytes, duration:136.5 ms, mean:5020.8 GB/sec, stdev=16.5 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + L1 BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:26843545600 bytes, duration:2.9 ms, mean:9229.5 GB/sec, stdev=2.9 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + LDS BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:33554432000 bytes, duration:1.9 ms, mean:17645.6 GB/sec, stdev=20.1 GB/sec + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak FLOPs (FP32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:274877906944, duration:13.078 ms, mean:20986.9 GFLOPS, stdev=310.8 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak FLOPs (FP64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:137438953472, duration:6.7 ms, mean:20408.029297.1 GFLOPS, stdev=2.7 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak MFMA FLOPs (BF16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:12.6 ms, mean:170280.0 GFLOPS, stdev=22.3 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak MFMA FLOPs (F16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:13.0 ms, mean:164733.6 GFLOPS, stdev=24.3 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak MFMA FLOPs (F32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:536870912000, duration:13.0 ms, mean:41399.6 GFLOPS, stdev=4.1 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak MFMA FLOPs (F64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:268435456000, duration:6.5 ms, mean:41379.2 GFLOPS, stdev=4.4 GFLOPS + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + Peak MFMA IOPs (I8), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, IOP:2147483648000, duration:12.9 ms, mean:166281.9 GOPS, stdev=2495.9 GOPS + GPU Device 1: Profiling... + ... + GPU Device 2: Profiling... + ... + GPU Device 3: Profiling... + ... + +.. tip:: + + To reduce verbosity of profiling output try the ``--quiet`` flag. This hides + ``rocprof`` output and activates a progress bar. + +.. _profiling-routine: + +Notice the two main stages in ROCm Compute Profiler's *default* profiling routine. + +1. The first stage collects all the counters needed for ROCm Compute Profiler analysis + (omitting any filters you have provided). + +2. The second stage collects data for the roofline analysis (this stage can be + disabled using ``--no-roof``). + +At the end of profiling, you can find all resulting ``csv`` files in a +:ref:`SoC `-specific target directory; for +example: + +* "MI300A" or "MI300X" for the AMD Instinct™ MI300 family of accelerators +* "MI200" for the AMD Instinct MI200 family of accelerators +* "MI100" for the AMD Instinct MI100 family of accelerators + +The SoC names are generated as a part of ROCm Compute Profiler, and do not *always* +distinguish between different accelerators in the same family; for instance, +an Instinct MI210 vs an Instinct MI250. + +.. note:: + + Additionally, you will notice a few extra files. An SoC parameters file, + ``sysinfo.csv``, is created to reflect the target device settings. All + profiling output is stored in ``log.txt``. Roofline-specific benchmark + results are stored in ``roofline.csv`` and roofline plots are outputted into PDFs as + ``empirRoof_gpu-0_[datatype1]_..._[datatypeN].pdf`` where data types requested through + ``--roofline-data-type`` option are listed in the file name. + +.. code-block:: shell-session + + $ ls workloads/vcopy/MI200/ + total 112 + total 60 + -rw-r--r-- 1 auser agroup 27937 Mar 1 15:15 log.txt + drwxr-xr-x 1 auser agroup 0 Mar 1 15:15 perfmon + -rw-r--r-- 1 auser agroup 26175 Mar 1 15:15 pmc_perf.csv + -rw-r--r-- 1 auser agroup 1708 Mar 1 15:17 roofline.csv + -rw-r--r-- 1 auser agroup 519 Mar 1 15:15 SQ_IFETCH_LEVEL.csv + -rw-r--r-- 1 auser agroup 456 Mar 1 15:15 SQ_INST_LEVEL_LDS.csv + -rw-r--r-- 1 auser agroup 474 Mar 1 15:15 SQ_INST_LEVEL_SMEM.csv + -rw-r--r-- 1 auser agroup 474 Mar 1 15:15 SQ_INST_LEVEL_VMEM.csv + -rw-r--r-- 1 auser agroup 599 Mar 1 15:15 SQ_LEVEL_WAVES.csv + -rw-r--r-- 1 auser agroup 650 Mar 1 15:15 sysinfo.csv + -rw-r--r-- 1 auser agroup 399 Mar 1 15:15 timestamps.csv + +.. _filtering: + +Filtering +========= + +To reduce profiling time and the counters collected, you should use profiling +filters. Profiling filters and their functionality depend on the underlying +profiler being used. While ROCm Compute Profiler is profiler-agnostic, this following is a +detailed description of profiling filters available when using ROCm Compute Profiler with +:doc:`ROCProfiler `. + +Filtering options +----------------- + +``-b``, ``--block `` + Allows system profiling on one or more selected hardware report blocks to speed + up the profiling process. See :ref:`profiling-hw-component-filtering`. + +``-k``, ``--kernel `` + Allows for kernel filtering. Usage is equivalent with the current ``rocprof`` + utility. See :ref:`profiling-kernel-filtering`. + +``-d``, ``--dispatch `` + Allows for dispatch ID filtering. Usage is equivalent with the current + ``rocprof`` utility. See :ref:`profiling-dispatch-filtering`. + +.. tip:: + + Be cautious when combining different profiling filters in the same call. + Conflicting filters may result in error. + + For example, filtering a dispatch, but that dispatch doesn't match your + kernel name filter. + +.. _profiling-hw-component-filtering: + +Hardware report block filtering +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can profile specific hardware report blocks to speed up the profiling process. +In ROCm Compute Profiler, the term hardware report block refers to a section of the +analysis report which focuses on metrics associated with a hardware component or +a group of hardware components. All profiling results are accumulated in the same +target directory without overwriting those for other hardware components. +This enables incremental profiling and analysis. + +The following example only gathers hardware counters used to calculate metrics +for ``Compute Unit - Instruction Mix`` (block 10) and ``Wavefront Launch Statistics`` +(block 7) sections of the analysis report, while skipping over all other hardware counters. + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256 + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + rocprofiler-compute version: 2.0.0 + Profiler choice: rocprofv1 + Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Target: MI200 + Command: ./vcopy -n 1048576 -b 256 + Kernel Selection: None + Dispatch Selection: None + Hardware Blocks: [] + Report Sections: ['10', '7'] + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Collecting Performance Counters + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ... + + +To see a list of available hardware report blocks, use the ``--list-metrics`` option. + +.. code-block:: shell-session + + $ rocprof-compute profile --list-metrics + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + 0 -> Top Stats + 1 -> System Info + 2 -> System Speed-of-Light + 2.1 -> Speed-of-Light + 2.1.0 -> VALU FLOPs + 2.1.1 -> VALU IOPs + 2.1.2 -> MFMA FLOPs (F8) + ... + 5 -> Command Processor (CPC/CPF) + 5.1 -> Command Processor Fetcher + 5.1.0 -> CPF Utilization + 5.1.1 -> CPF Stall + 5.1.2 -> CPF-L2 Utilization + 5.2 -> Packet Processor + 5.2.0 -> CPC Utilization + 5.2.1 -> CPC Stall Rate + 5.2.5 -> CPC-UTCL1 Stall + ... + 6 -> Workgroup Manager (SPI) + 6.1 -> Workgroup Manager Utilizations + 6.1.0 -> Accelerator Utilization + 6.1.1 -> Scheduler-Pipe Utilization + 6.1.2 -> Workgroup Manager Utilization + + +It is also possible to filter counter collection by hardware component such as Shader Sequencer (SQ) +and L2 cache (TCC) as shown below. + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256 + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + fname: pmc_cpc_perf: Skipped + fname: pmc_spi_perf: Skipped + fname: pmc_cpf_perf: Skipped + fname: pmc_tcp_perf: Skipped + fname: pmc_sq_perf4: Added + fname: pmc_tcc_perf: Added + fname: pmc_sq_perf8: Added + fname: pmc_ta_perf: Skipped + fname: pmc_sq_perf1: Added + fname: pmc_sq_perf3: Added + fname: pmc_td_perf: Skipped + fname: pmc_tcc2_perf: Skipped + fname: pmc_sqc_perf1: Skipped + fname: pmc_sq_perf6: Added + fname: pmc_sq_perf2: Added + rocprofiler-compute version: 2.0.0 + Profiler choice: rocprofv1 + Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Target: MI200 + Command: ./vcopy -n 1048576 -b 256 + Kernel Selection: None + Dispatch Selection: None + Hardware Blocks: ['sq', 'tcc'] + Report Sections: [] + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Collecting Performance Counters + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ... + +.. warning:: + + Filtering by hardware components (e.g. SQ, TCC) will soon be deprecated. + It is recommended to use hardware report block based filtering. + +.. _profiling-kernel-filtering: + +Kernel filtering +^^^^^^^^^^^^^^^^ + +Kernel filtering is based on the name of the kernels you want to isolate. Use a +kernel name substring list to isolate desired kernels. + +The following example demonstrates profiling isolating the kernel matching +substring ``vecCopy``. + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy -k vecCopy -- ./vcopy -n 1048576 -b 256 + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + rocprofiler-compute version: 2.0.0 + Profiler choice: rocprofv1 + Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Target: MI200 + Command: ./vcopy -n 1048576 -b 256 + Kernel Selection: ['vecCopy'] + Dispatch Selection: None + Hardware Blocks: All + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Collecting Performance Counters + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ... + +.. _profiling-dispatch-filtering: + +Dispatch filtering +^^^^^^^^^^^^^^^^^^ + +Dispatch filtering is based on the *global* dispatch index of kernels in a run. + +The following example profiles only the first kernel dispatch in the execution +of the application (note zero-based indexing). + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy -d 0 -- ./vcopy -n 1048576 -b 256 + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + rocprofiler-compute version: 2.0.0 + Profiler choice: rocprofv1 + Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Target: MI200 + Command: ./vcopy -n 1048576 -b 256 + Kernel Selection: None + Dispatch Selection: ['0'] + Hardware Blocks: All + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Collecting Performance Counters + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ... + +.. _standalone-roofline: + +Standalone roofline +=================== + +If you are only interested in generating roofline analysis data try using +``--roof-only``. This will only collect counters relevant to roofline, as well +as generate a standalone ``.pdf`` output of your roofline plot. + +Roofline options +---------------- + +``--sort `` + Allows you to specify whether you would like to overlay top kernel or top + dispatch data in your roofline plot. + +``-m``, ``--mem-level `` + Allows you to specify specific levels of cache to include in your roofline + plot. + +``--device `` + Allows you to specify a device ID to collect performance data from when + running a roofline benchmark on your system. + +``--roofline-data-type `` + Allows you to specify data types that you want plotted in the roofline PDF output(s). Selecting more than one data type will overlay the results onto the same plot. Default: FP32 + +.. note:: + + For more information on data types supported based on the GPU architecture, see :doc:`../../conceptual/performance-model` + +To distinguish different kernels in your ``.pdf`` roofline plot use +``--kernel-names``. This will give each kernel a unique marker identifiable from +the plot's key. + + +Roofline only +------------- + +The following example demonstrates profiling roofline data only: + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy --roof-only -- ./vcopy -n 1048576 -b 256 + + ... + [roofline] Checking for roofline.csv in /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + [roofline] No roofline data found. Generating... + Checking for roofline.csv in /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Empirical Roofline Calculation + Copyright © 2022 Advanced Micro Devices, Inc. All rights reserved. + Total detected GPU devices: 4 + GPU Device 0: Profiling... + 99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ] + ... + Empirical Roofline PDFs saved! + +An inspection of our workload output folder shows ``.pdf`` plots were generated +successfully. + +.. code-block:: shell-session + + $ ls workloads/vcopy/MI200/ + total 48 + -rw-r--r-- 1 auser agroup 13331 Mar 1 16:05 empirRoof_gpu-0_FP32.pdf + drwxr-xr-x 1 auser agroup 0 Mar 1 16:03 perfmon + -rw-r--r-- 1 auser agroup 1101 Mar 1 16:03 pmc_perf.csv + -rw-r--r-- 1 auser agroup 1715 Mar 1 16:05 roofline.csv + -rw-r--r-- 1 auser agroup 650 Mar 1 16:03 sysinfo.csv + -rw-r--r-- 1 auser agroup 399 Mar 1 16:03 timestamps.csv + +.. note:: + + * ROCm Compute Profiler currently captures roofline profiling for all data types, and you can reduce the clutter in the PDF outputs by filtering the data type(s). Selecting multiple data types will overlay the results into the same PDF. To generate results in separate PDFs for each data type from the same workload run, you can re-run the profiling command with each data type as long as the ``roofline.csv`` file still exists in the workload folder. + * Roofline feature is currently not enabled on AMD Instinct MI350. + +The following image is a sample ``empirRoof_gpu-0_FP32.pdf`` roofline +plot. + +.. image:: ../../data/profile/sample-roof-plot.jpg + :align: center + :alt: Sample ROCm Compute Profiler roofline output + :width: 800 diff --git a/projects/rocprofiler-compute/docs/how-to/use.rst b/projects/rocprofiler-compute/docs/how-to/use.rst new file mode 100644 index 0000000000..f361df142d --- /dev/null +++ b/projects/rocprofiler-compute/docs/how-to/use.rst @@ -0,0 +1,258 @@ +.. meta:: + :description: ROCm Compute Profiler basic usage + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, + basics, usage, operations + +*********** +Basic usage +*********** + +The following section outlines basic ROCm Compute Profiler workflows, modes, options, and +operations. + +Command line profiler +===================== + +Launch and profile the target application using the command line profiler. + +The command line profiler launches the target application, calls the +ROCProfiler API via the ``rocprof`` binary, and collects profile results for +the specified kernels, dispatches, and hardware components. If not +specified, ROCm Compute Profiler defaults to collecting all available counters for all +kernels and dispatches launched by the your executable. + +To collect the default set of data for all kernels in the target +application, launch, for example: + +.. code-block:: shell + + $ rocprof-compute profile -n vcopy_data -- ./vcopy -n 1048576 -b 256 + +This runs the app, launches each kernel, and generates profiling results. By +default, results are written to a subdirectory with your accelerator's name; +for example, ``./workloads/vcopy_data/MI200/``, where name is configurable +via the ``-n`` argument. + +.. note:: + + To collect all requested profile information, ROCm Compute Profiler might replay kernels + multiple times. + +.. _basic-filter-data-collection: + +Customize data collection +------------------------- + +Options are available to specify for which kernels and metrics data should be +collected. Note that you can apply filtering in either the profiling or +analysis stage. Filtering at profiling collection often speeds up your +aggregate profiling run time. + +Common filters to customize data collection include: + +``-k``, ``--kernel`` + Enables filtering kernels by name. + +``-d``, ``--dispatch`` + Enables filtering based on dispatch ID. + +``-b``, ``--block`` + Enables collection metrics for only the specified hardware report blocks. + +See :ref:`Filtering ` for an in-depth walkthrough. + +To view available metrics by hardware block, use the ``profile`` mode ``--list-metrics`` +option with an optional system architecture argument (inferred if not provided): + +.. code-block:: shell + + $ rocprof-compute profile --list-metrics + $ rocprof-compute profile --list-metrics + +.. _basic-analyze-cli: + +Analyze in the command line +--------------------------- + +After generating a local output folder (for example, +``./workloads/vcopy_data/MI200``), use the command line tool to quickly +interface with profiling results. View different metrics derived from your +profiled results and get immediate access all metrics organized by hardware +blocks. + +If you don't apply kernel, dispatch, or hardware report block filters at this stage, +analysis is reflective of the entirety of the profiling data. + +To interact with profiling results from a different session, provide the +workload path. + +``-p``, ``--path`` + Enables you to analyze existing profiling data in the ROCm Compute Profiler CLI. + +See :doc:`analyze/cli` for more detailed information. + +.. _basic-analyze-grafana: + +Analyze in the Grafana GUI +-------------------------- + +To conduct a more in-depth analysis of profiling results, it's suggested to use +a Grafana GUI with ROCm Compute Profiler. To interact with profiling results, import your +data to the MongoDB instance included in the ROCm Compute Profiler Dockerfile. See +:doc:`/install/grafana-setup`. + +To interact with Grafana data, stored in the ROCm Compute Profiler database, enter +``database`` :ref:`mode `; for example: + +.. code-block:: shell + + $ rocprof-compute database --import [CONNECTION OPTIONS] + +See :doc:`/how-to/analyze/grafana-gui` for more detailed information. + +.. _modes: + +Modes +===== + +Modes change the fundamental behavior of the ROCm Compute Profiler command line tool. +Depending on which mode you choose, different command line options become +available. + +.. _modes-profile: + +Profile mode +------------ + +``profile`` + Launches the target application on the local system using + :doc:`ROCProfiler `. Depending on the profiling options + chosen, selected kernels, dispatches, and or hardware components used by the + application are profiled. It stores results locally in an output folder: + ``./workloads/\``. + + .. code-block:: shell + + $ rocprof-compute profile --help + +See :doc:`profile/mode` to learn about this mode in depth and to get started +profiling with ROCm Compute Profiler. + +.. _modes-analyze: + +Analyze mode +------------ + +``analyze`` + Loads profiling data from the ``--path`` (``-p``) directory into the ROCm Compute Profiler + CLI analyzer where you have immediate access to profiling results and + generated metrics. It generates metrics from the entirety of your profiled + application or a subset identified through the ROCm Compute Profiler CLI analysis filters. + + To generate a lightweight GUI interface, you can add the ``--gui`` flag to your + analysis command. + + Analyze mode now supports a lightweight Text-based User Interface (TUI) that + provides an interactive terminal experience for enhanced usability. To enable TUI mode, + use the ``--tui`` flag when running the analyze command: + + .. code-block:: shell + + $ rocprof-compute analyze --tui + + This mode is a middle ground to the highly detailed ROCm Compute Profiler Grafana GUI and + is great if you want immediate access to a hardware component you’re already + familiar with. + + .. code-block:: shell + + $ rocprof-compute analyze --help + +See :doc:`analyze/mode` to learn about this mode in depth and to get started +with analysis using ROCm Compute Profiler. + +.. _modes-database: + +Database mode +------------- + +``database`` + The Grafana analyzer GUI is built on a MongoDB database. ``--import`` + profiling results to the DB to interact with the workload in Grafana or + ``--remove`` the workload from the DB. + + Connection options need to be specified. See :doc:`/how-to/analyze/grafana-gui` for + more details. + + .. code-block:: shell + + $ rocprof-compute database --help + +See :doc:`/install/grafana-setup` to learn about setting up a Grafana server and +database instance to make your profiling data more digestible and shareable. + +.. _global-options: + +Global options +============== + +The ROCm Compute Profiler command line tool has a set of *global* utility options that are +available across all modes. + +``-v``, ``--version`` + Prints the ROCm Compute Profiler version and exits. + +``-V``, ``--verbose`` + Increases output verbosity. Use multiple times for higher levels of + verbosity. + +``-q``, ``--quiet`` + Reduces output verbosity and runs quietly. + +``-s``, ``--specs`` + Prints system specs and exits. + +.. note:: + + ROCm Compute Profiler also recognizes the project variable, ``ROCPROFCOMPUTE_COLOR`` should you + choose to disable colorful output. To disable default colorful behavior, set + this variable to ``0``. + +.. _basic-operations: + +Basic operations +================ + +The following table lists ROCm Compute Profiler's basic operations, their +:ref:`modes `, and required arguments. + +.. list-table:: + :header-rows: 1 + + * - Operation description + - Mode + - Required arguments + + * - :doc:`Profile a workload ` + - ``profile`` + - ``--name``, ``-- `` + + * - :ref:`Standalone roofline analysis ` + - ``profile`` + - ``--name``, ``--roof-only``, ``--roofline-data-type ``, ``-- `` + + * - :ref:`Import a workload to database ` + - ``database`` + - ``--import``, ``--host``, ``--username``, ``--workload``, ``--team`` + + * - :ref:`Remove a workload from database ` + - ``database`` + - ``--remove``, ``--host``, ``--username``, ``--workload``, ``--team`` + + * - :doc:`Launch standalone GUI from CLI ` + - ``analyze`` + - ``--path``, ``--gui`` + + * - :doc:`Interact with profiling results from CLI ` + - ``analyze`` + - ``--path`` diff --git a/projects/rocprofiler-compute/docs/index.rst b/projects/rocprofiler-compute/docs/index.rst new file mode 100644 index 0000000000..60cfa5feb6 --- /dev/null +++ b/projects/rocprofiler-compute/docs/index.rst @@ -0,0 +1,88 @@ +.. meta:: + :description: ROCm Compute Profiler documentation and reference + :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD + +*********************************** +ROCm Compute Profiler documentation +*********************************** + +This documentation provides a comprehensive overview of the ROCm Compute +Profiler tool. In addition to a full deployment guide with installation +instructions, this documentation also explains the ideas motivating the design +behind the tool and its components. + +If you're new to ROCm Compute Profiler, familiarize yourself with the tool by reviewing the +chapters that follow and gradually learn its more advanced features. To get +started, see :doc:`What is ROCm Compute Profiler? `. + +ROCm Compute Profiler is open source and hosted at ``__. + +.. grid:: 2 + :gutter: 3 + + .. grid-item-card:: Install + + * :doc:`Installation and deployment ` + * :doc:`Grafana server for ROCm Compute Profiler ` + + .. grid-item:: + +Use the following topics to learn more about the advantages of ROCm Compute Profiler in your +development toolkit, how it aims to model performance, and how to use ROCm Compute Profiler +in practice. + +.. grid:: 2 + :gutter: 3 + + .. grid-item-card:: How to + + * :doc:`how-to/use` + + * :doc:`how-to/profile/mode` + + * :doc:`how-to/analyze/mode` + + * :doc:`how-to/analyze/cli` + + * :doc:`how-to/analyze/grafana-gui` + + * :doc:`how-to/analyze/standalone-gui` + + * :doc:`how-to/analyze/tui` + + .. grid-item-card:: Conceptual + + * :doc:`conceptual/performance-model` + + * :doc:`conceptual/compute-unit` + + * :doc:`conceptual/l2-cache` + + * :doc:`conceptual/shader-engine` + + * :doc:`conceptual/command-processor` + + * :doc:`conceptual/system-speed-of-light` + + * :doc:`conceptual/definitions` + + * :ref:`normalization-units` + + .. grid-item-card:: Tutorials + + * :doc:`tutorial/profiling-by-example` + + * :doc:`Learning resources ` + + .. grid-item-card:: Reference + + * :doc:`reference/compatible-accelerators` + + * :doc:`reference/faq` + +This project is proudly open source. For more details on how to contribute, +refer to +`Contributing to ROCm `_. + +Find ROCm licensing information on the +`Licensing `_ page. diff --git a/projects/rocprofiler-compute/docs/install/core-install.rst b/projects/rocprofiler-compute/docs/install/core-install.rst new file mode 100644 index 0000000000..87e8dd151f --- /dev/null +++ b/projects/rocprofiler-compute/docs/install/core-install.rst @@ -0,0 +1,260 @@ +.. meta:: + :description: ROCm Compute Profiler installation and deployment + :keywords: Omniperf, ROCm Compute Profiler, ROCm, tool, Instinct, accelerator, AMD, + install, deploy, Grafana, client, configuration, modulefiles + +********************************************** +Installing and deploying ROCm Compute Profiler +********************************************** + +ROCm Compute Profiler consists of two installation components. + +* :ref:`ROCm Compute Profiler core installation ` (client-side) + + * Provides the core application profiling capability. + * Allows the collection of performance counters, filtering by hardware + block, dispatch, kernel, and more. + * Provides a CLI-based analysis mode. + * Provides a standalone web interface for importing analysis metrics. + +* :doc:`Grafana server for ROCm Compute Profiler ` (server-side) (*optional*) + + * Hosts the MongoDB backend and Grafana instance. + * Is packaged in a Docker container for easy setup. + +Determine what you need to install based on how you would like to interact with +ROCm Compute Profiler. See the following decision tree to help determine what installation is +right for you. + +.. image:: ../data/install/install-decision-tree.png + :align: center + :alt: Decision tree for installing and deploying ROCm Compute Profiler + :width: 800 + +.. _core-install: + +Core installation +================= + +The core ROCm Compute Profiler application requires the following basic software +dependencies. As of ROCm 6.2, the core ROCm Compute Profiler is included with your ROCm +installation. + +* Python ``>= 3.8`` +* CMake ``>= 3.19`` +* ROCm ``>= 5.7.1`` + +.. note:: + + ROCm Compute Profiler will use the first version of ``python3`` found in your system's + ``PATH``. If the default version of Python is older than 3.8, you may need to + update your system's ``PATH`` to point to a newer version. + +ROCm Compute Profiler depends on a number of Python packages documented in the top-level +``requirements.txt`` file. Install these *before* configuring ROCm Compute Profiler. + +.. tip:: + + If looking to build ROCm Compute Profiler as a developer, consider these additional + requirements. + + .. list-table:: + + * - ``docs/sphinx/requirements.txt`` + - Python packages required to build this documentation from source. + + * - ``requirements-test.txt`` + - Python packages required to run ROCm Compute Profiler's CI suite using PyTest. + +The recommended procedure for ROCm Compute Profiler usage is to install into a shared file +system so that multiple users can access the final installation. The +following steps illustrate how to install the necessary Python dependencies +using `pip `_ and ROCm Compute Profiler into a +shared location controlled by the ``INSTALL_DIR`` environment variable. + +.. tip:: + + To always run ROCm Compute Profiler with a particular version of Python, you can create a + bash alias. For example, to run ROCm Compute Profiler with Python 3.10, you can run the + following command: + + .. code-block:: shell + + alias rocprof-compute-mypython="/usr/bin/python3.10 /opt/rocm/bin/rocprof-compute" + +.. _core-install-cmake-vars: + +Configuration variables +----------------------- +The following installation example leverages several +`CMake `_ project variables defined as +follows. + +.. list-table:: + :header-rows: 1 + + * - CMake variable + - Description + + * - ``CMAKE_INSTALL_PREFIX`` + - Controls the install path for ROCm Compute Profiler files. + + * - ``PYTHON_DEPS`` + - Specifies an optional path to resolve Python package dependencies. + + * - ``MOD_INSTALL_PATH`` + - Specifies an optional path for separate ROCm Compute Profiler modulefile installation. + +.. _core-install-steps: + +Install from source +------------------- + +#. A typical install begins by downloading the latest release tarball available + from ``__. From there, untar and + navigate into the top-level directory. + + .. + {{ config.version }} substitutes the ROCm Compute Profiler version in ../conf.py + + .. datatemplate:nodata:: + + .. code-block:: shell + + tar xfz rocprofiler-compute-v{{ config.version }}.tar.gz + cd rocprofiler-compute-v{{ config.version }} + +#. Next, install Python dependencies and complete the ROCm Compute Profiler configuration and + install process. + + .. datatemplate:nodata:: + + .. code-block:: shell + + # define top-level install path + export INSTALL_DIR= + + # install python deps + python3 -m pip install -t ${INSTALL_DIR}/python-libs -r requirements.txt + + # configure ROCm Compute Profiler for shared install + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/{{ config.version }} \ + -DPYTHON_DEPS=${INSTALL_DIR}/python-libs \ + -DMOD_INSTALL_PATH=${INSTALL_DIR}/modulefiles/rocprofiler-compute .. + + # install + make install + + .. tip:: + + You might need to ``sudo`` the final installation step if you don't have + write access for the chosen installation path. + +#. Upon successful installation, your top-level installation directory should + look like this. + + .. datatemplate:nodata:: + + .. code-block:: shell + + $ ls $INSTALL_DIR + modulefiles {{ config.version }} python-libs + +.. _core-install-modulefiles: + +Execution using modulefiles +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The installation process includes the creation of an environment modulefile for +use with `Lmod `_. On systems that support Lmod, +you can register the ROCm Compute Profiler modulefile directory and setup your environment +for execution of ROCm Compute Profiler as follows. + +.. datatemplate:nodata:: + + .. code-block:: shell + + $ module use $INSTALL_DIR/modulefiles + $ module load rocprofiler-compute + $ which rocprof-compute + /opt/apps/rocprofiler-compute/{{ config.version }}/bin/rocprof-compute + + $ rocprof-compute --version + ROC Profiler: /opt/rocm-5.1.0/bin/rocprof + + rocprofiler-compute (v{{ config.version }}) + +.. tip:: + + If you're relying on an Lmod Python module locally, you may wish to customize + the resulting ROCm Compute Profiler modulefile post-installation to include extra + module dependencies. + +Execution without modulefiles +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To use ROCm Compute Profiler without the companion modulefile, update your ``PATH`` +settings to enable access to the command line binary. If you installed Python +dependencies in a shared location, also update your ``PYTHONPATH`` +configuration. + +.. datatemplate:nodata:: + + .. code-block:: shell + + export PATH=$INSTALL_DIR/{{ config.version }}/bin:$PATH + export PYTHONPATH=$INSTALL_DIR/python-libs + +.. _core-install-package: + +Install via package manager +--------------------------- + +Once ROCm (minimum version 6.2.0) is installed, you can install ROCm Compute Profiler using +your operating system's native package manager using the following commands. +See :doc:`rocm-install-on-linux:index` for guidance on installing the ROCm +software stack. + +.. tab-set:: + + .. tab-item:: Ubuntu + + .. code-block:: shell + + $ sudo apt install rocprofiler-compute + # Include rocprofiler-compute in your system PATH + $ sudo update-alternatives --install /usr/bin/rocprof-compute rocprof-compute /opt/rocm/bin/rocprof-compute 0 + # Install Python dependencies + $ python3 -m pip install -r /opt/rocm/libexec/rocprofiler-compute/requirements.txt + + .. tab-item:: Red Hat Enterprise Linux + + .. code-block:: shell + + $ sudo dnf install rocprofiler-compute + # Include rocprofiler-compute in your system PATH + $ sudo update-alternatives --install /usr/bin/rocprof-compute rocprof-compute /opt/rocm/bin/rocprof-compute 0 + # Install Python dependencies + $ python3 -m pip install -r /opt/rocm/libexec/rocprofiler-compute/requirements.txt + + .. tab-item:: SUSE Linux Enterprise Server + + .. code-block:: shell + + $ sudo zypper install rocprofiler-compute + # Include rocprofiler-compute in your system PATH + $ sudo update-alternatives --install /usr/bin/rocprof-compute rocprof-compute /opt/rocm/bin/rocprof-compute 0 + # Install Python dependencies + $ python3 -m pip install -r /opt/rocm/libexec/rocprofiler-compute/requirements.txt + +.. _core-install-rocprof-var: + +ROCProfiler +----------- + +ROCm Compute Profiler relies on :doc:`ROCProfiler `'s ``rocprof`` binary +during the profiling process. Normally, the path to this binary is detected +automatically, but you can override the path by the setting the optional +``ROCPROF`` environment variable. diff --git a/projects/rocprofiler-compute/docs/install/grafana-setup.rst b/projects/rocprofiler-compute/docs/install/grafana-setup.rst new file mode 100644 index 0000000000..3753081e01 --- /dev/null +++ b/projects/rocprofiler-compute/docs/install/grafana-setup.rst @@ -0,0 +1,219 @@ +.. meta:: + :description: ROCm Compute Profiler Grafana server installation and deployment + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, + install, deploy, Grafana, server, configuration, GUI + +*************************************************** +Setting up Grafana server for ROCm Compute Profiler +*************************************************** + +.. warning:: + + Grafana and MongoDB functionality is deprecated and will be removed in a future release. + +A Grafana server is *not required* to profile or analyze performance data +from the CLI. It's a supplementary mechanism to help you import performance +data and examine it in a detailed +`Grafana `_ dashboard GUI. + +Learn about installing and configuring the main ROCm Compute Profiler tool in +:ref:`core-install`. + +Setting up a Grafana instance for ROCm Compute Profiler requires the following basic software +dependencies. + +* `Docker Engine `_ + +The recommended process for enabling the server-side of ROCm Compute Profiler is to use the +provided ``Dockerfile`` to build the Grafana and MongoDB instance. + +.. _grafana-mongodb-setup: + +Set up Grafana and MongoDB +========================== + +Once you've decided where to host the Grafana and MongoDB instance, complete the +the following setup instructions. + +Install MongoDB utilities +------------------------- + +ROCm Compute Profiler uses the +`mongoimport `_ +utility to upload data to your Grafana instance's backend database. + +Use the following commands to install MongoDB utilities for Ubuntu 20.04. + +.. code-block:: bash + + $ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb + $ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb + +.. note:: + + Find installation instructions for other distributions in + `MongoDB Database Tools Downloads `_. + +.. _grafana-persistent-storage-setup: + +Set up persistent storage +------------------------- + +Bind MongoDB to a directory on the host OS to create a local backup in case of a +crash or reset. This is called *creating a persistent volume*. + +.. code-block:: bash + + $ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/ + $ sudo mkdir -p grafana-storage mongodb + $ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/grafana-storage --opt o=bind grafana-storage + $ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/mongodb --opt o=bind grafana-mongo-db + +.. _grafana-docker-container: + +Build and launch the Docker container +------------------------------------- + +You're now ready to build your ``Dockerfile``. Navigate to your ROCm Compute Profiler install +directory to begin. + +.. code-block:: bash + + $ cd grafana + $ sudo docker-compose build + $ sudo docker-compose up -d + +.. note:: + + To troubleshoot Docker container build failures related to certificate verification, try + disabling any network proxy services on the host system. These proxy services can interfere + with OpenSSL's ability to retrieve a correct certificate chain when the container accesses + external websites. + +The TCP ports for Grafana (``4000``) and MongoDB (``27017``) in the Docker +container are mapped to ``14000`` and ``27018``, respectively, on the host side. + +.. tip:: + + In the event that either your Grafana or MongoDB instance crashes fatally, + just restart the server. Navigate to your install directory and run: + + .. code-block:: + + $ sudo docker-compose down + $ sudo docker-compose up -d + +.. _grafana-dashboard-setup: + +Set up the Grafana dashboard +---------------------------- + +Once you've launched your Docker container you should be able to reach Grafana +at ``http://:14000``. The default login credentials for your first-time +Grafana setup are: + +* **Username**: ``admin`` +* **Password**: ``admin`` + +.. figure:: ../data/install/grafana_welcome.png + :align: center + :alt: Grafana dashboard welcome screen + :width: 800 + + Grafana's welcome screen. + +.. _grafana-datasource-setup: + +Configure the MongoDB data source +--------------------------------- + +You must configure your MongoDB data source in Grafana before first-time use. +Navigate to Grafana's **Configuration** page to add the "Omniperf Data" +connection. + +.. figure:: ../data/install/datasource_config.jpg + :align: center + :alt: Grafana data source configuration + :width: 800 + + Grafana's Configuration page. + +Configure the following fields in the data source settings. + +.. list-table:: + :stub-columns: 1 + + * - HTTP URL + - ``http://localhost:3333`` + + * - MongoDB URL + - ``mongodb://temp:temp123@\:27018/admin?authSource=admin`` + + * - Database Name + - ``admin`` + +After configuring these fields, click **Save & test** to make sure your +connection is successful. + +.. figure:: ../data/install/datasource_settings.jpg + :align: center + :alt: Grafana data source settings + :width: 800 + + Grafana data source settings. + +.. note:: + + To avoid potential DNS issues, you might need to use the actual IP address + for the host node in the MongoDB URL. + +.. _grafana-import-dashboard-file: + +Import the ROCm Compute Profiler dashboard file +----------------------------------------------- + +From the **Create** → **Import** page, upload the dashboard file, +``/dashboards/Omniperf_v{__VERSION__}_pub.json`` from the +:doc:`ROCm Compute Profiler tarball `. + +Edit both the dashboard **Name** and the **Unique identifier (UID)** fields to +uniquely identify the dashboard. Click **Import** to complete the process. + +.. figure:: ../data/install/import_dashboard.png + :align: center + :alt: Grafana's import dashboard + :width: 800 + + Grafana's Import dashboard. + +.. _grafana-select-workload: + +Select and load the ROCm Compute Profiler workload +-------------------------------------------------- + +Once you have imported a dashboard you're ready to begin. Start by browsing +available dashboards and selecting the dashboard you have just imported. + +.. figure:: ../data/install/opening_dashboard.png + :align: center + :alt: Opening your ROCm Compute Profiler dashboard in Grafana + :width: 800 + + Opening your ROCm Compute Profiler profiling dashboard in Grafana. + +Remember that you need to upload workload data to the MongoDB backend before +analyzing in your Grafana interface. See a detailed example of this in +:ref:`grafana-gui-import`. + +After a workload has been successfully uploaded, you should be able to select it +from the workload dropdown located at the top of your Grafana dashboard. + +.. figure:: ../data/install/grafana_workload_selection.png + :align: center + :alt: ROCm Compute Profiler workload selection in Grafana + :width: 800 + + Selecting your ROCm Compute Profiler workload in Grafana. + +For more information on how to use the Grafana interface for analysis see +:doc:`/how-to/analyze/grafana-gui`. diff --git a/projects/rocprofiler-compute/docs/license.rst b/projects/rocprofiler-compute/docs/license.rst new file mode 100644 index 0000000000..b64c897290 --- /dev/null +++ b/projects/rocprofiler-compute/docs/license.rst @@ -0,0 +1,10 @@ +.. meta:: + :description: ROCm Compute Profiler license + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD, + license + +******* +License +******* + +.. include:: ../LICENSE diff --git a/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst b/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst new file mode 100644 index 0000000000..3dd2548f97 --- /dev/null +++ b/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst @@ -0,0 +1,39 @@ +.. meta:: + :description: ROCm Compute Profiler support: compatible accelerators and GPUs + :keywords: Omniperf, compatible, cdna, gcn, gfx, rdna, radeon, hardware, architecture + +*********************** +Compatible accelerators +*********************** + +The following table lists SoCs (System on Chip) tested for compatibility with +ROCm Compute Profiler. See :doc:`rocm:reference/gpu-arch-specs` for full AMD accelerator and +GPU specifications. + +.. _def-soc: + +.. note:: + + In ROCm Compute Profiler documentation, the term System on Chip (SoC) refers to a + particular family of AMD accelerators. + +.. list-table:: + :header-rows: 1 + + * - Platform + - Status + + * - AMD Instinct™ MI350 + - Supported ✅ + + * - AMD Instinct™ MI300 + - Supported ✅ + + * - AMD Instinct MI200 + - Supported ✅ + + * - AMD Instinct MI100 + - Supported ✅ + + * - AMD Instinct MI50, MI60 (Vega 20) + - No support ❌ diff --git a/projects/rocprofiler-compute/docs/reference/faq.rst b/projects/rocprofiler-compute/docs/reference/faq.rst new file mode 100644 index 0000000000..2ec10debf3 --- /dev/null +++ b/projects/rocprofiler-compute/docs/reference/faq.rst @@ -0,0 +1,85 @@ +.. meta:: + :description: ROCm Compute Profiler FAQ and troubleshooting + :keywords: ROCm Compute Profiler, FAQ, troubleshooting, ROCm, profiler, tool, Instinct, + accelerator, AMD, SSH, error, version, workaround, help + +*** +FAQ +*** + +Frequently asked questions and troubleshooting tips. + +How do I export profiling data I have already generated using ROCm Compute Profiler? +==================================================================================== + +To interact with the Grafana GUI, you must sync data with the MongoDB +backend. You can do this using :ref:`database ` mode. + +Pass in the directory of your desired workload as follows. + +.. code-block:: shell + + $ rocprof-compute database --import -w -H -u -t + +python ast error: 'Constant' object has no attribute 'kind' +=========================================================== + +This error arises from a bug in the default ``astunparse 1.6.3`` with +``python 3.8``. The error doesn't seem to occur with Python 3.7 or 3.9. + +Workaround: + +.. code-block:: shell + + $ pip3 uninstall astunparse + $ pip3 astunparse + +tabulate doesn't print properly +=============================== + +To get around this issue, set the following environment variables to update your +locale settings. + +.. code-block:: shell + + $ export LC_ALL=C.UTF-8 + $ export LANG=C.UTF-8 + +How can I SSH tunnel in MobaXterm? +================================== + +1. Open MobaXterm. +2. In the top ribbon, select **Tunneling** to access tunneling options. + + .. image:: ../data/faq/tunnel_demo1.png + :align: center + :alt: MobaXterm Tunnel button + :width: 800 + + This pop-up should appear. + + .. image:: ../data/faq/tunnel_demo2.png + :align: center + :alt: MobaXterm pop-up + :width: 800 + +3. Select **New SSH tunnel**. + + .. image:: ../data/faq/tunnel_demo3.png + :align: center + :alt: MobaXterm pop-up + :width: 800 + +4. Configure the SSH tunnel. + + Local clients + * ````: ``[PORT]`` + + Remote server + * ````: ``localhost`` + * ````: ``[PORT]`` + + SSH server + * ````: *name of the server to connect to* + * ````: *username to login to the server* + * ````: ``22`` diff --git a/projects/rocprofiler-compute/docs/sphinx/_toc.yml.in b/projects/rocprofiler-compute/docs/sphinx/_toc.yml.in new file mode 100644 index 0000000000..483bf1c0f5 --- /dev/null +++ b/projects/rocprofiler-compute/docs/sphinx/_toc.yml.in @@ -0,0 +1,64 @@ +# Anywhere {branch} is used, the branch name will be substituted. +# These comments will also be removed. +defaults: + numbered: False + maxdepth: 6 +root: index +subtrees: + - entries: + - file: what-is-rocprof-compute.rst + + - caption: Install + entries: + - file: install/core-install.rst + title: Installation and deployment + - file: install/grafana-setup.rst + title: Grafana server setup + + - caption: How to + entries: + - file: how-to/use.rst + - file: how-to/pc_sampling.rst + title: Use PC sampling + - file: how-to/profile/mode.rst + - file: how-to/analyze/mode.rst + entries: + - file: how-to/analyze/cli.rst + - file: how-to/analyze/grafana-gui.rst + - file: how-to/analyze/standalone-gui.rst + - file: how-to/analyze/tui.rst + + - caption: Conceptual + entries: + - file: conceptual/performance-model.rst + entries: + - file: conceptual/compute-unit.rst + title: Compute unit + entries: + - file: conceptual/pipeline-descriptions.rst + - file: conceptual/pipeline-metrics.rst + - file: conceptual/local-data-share.rst + title: Local data share + - file: conceptual/vector-l1-cache.rst + title: Vector L1 cache + - file: conceptual/l2-cache.rst + title: L2 cache + - file: conceptual/shader-engine.rst + title: Shader engine + - file: conceptual/command-processor.rst + title: Command processor + - file: conceptual/system-speed-of-light.rst + title: System Speed-of-Light + - file: conceptual/references.rst + - file: conceptual/definitions.rst + + - caption: Tutorials + entries: + - file: tutorial/profiling-by-example.rst + - file: tutorial/learning-resources.rst + + - caption: Reference + entries: + - file: reference/compatible-accelerators.rst + - file: reference/faq.rst + - file: license.rst diff --git a/projects/rocprofiler-compute/docs/sphinx/requirements.in b/projects/rocprofiler-compute/docs/sphinx/requirements.in new file mode 100644 index 0000000000..fd7e1ddb15 --- /dev/null +++ b/projects/rocprofiler-compute/docs/sphinx/requirements.in @@ -0,0 +1,2 @@ +rocm-docs-core==1.21.1 +sphinxcontrib.datatemplates==0.11.0 diff --git a/projects/rocprofiler-compute/docs/sphinx/requirements.txt b/projects/rocprofiler-compute/docs/sphinx/requirements.txt new file mode 100644 index 0000000000..aa1a564d9a --- /dev/null +++ b/projects/rocprofiler-compute/docs/sphinx/requirements.txt @@ -0,0 +1,286 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +accessible-pygments==0.0.5 + # via pydata-sphinx-theme +alabaster==1.0.0 + # via sphinx +asttokens==3.0.0 + # via stack-data +attrs==24.3.0 + # via + # jsonschema + # jupyter-cache + # referencing +babel==2.16.0 + # via + # pydata-sphinx-theme + # sphinx +beautifulsoup4==4.12.3 + # via pydata-sphinx-theme +breathe==4.35.0 + # via rocm-docs-core +certifi==2024.12.14 + # via requests +cffi==1.17.1 + # via + # cryptography + # pynacl +charset-normalizer==3.4.1 + # via requests +click==8.1.8 + # via + # jupyter-cache + # sphinx-external-toc +comm==0.2.2 + # via ipykernel +cryptography==44.0.0 + # via pyjwt +debugpy==1.8.12 + # via ipykernel +decorator==5.1.1 + # via ipython +defusedxml==0.7.1 + # via sphinxcontrib-datatemplates +deprecated==1.2.15 + # via pygithub +docutils==0.21.2 + # via + # breathe + # myst-parser + # pydata-sphinx-theme + # sphinx +exceptiongroup==1.2.2 + # via ipython +executing==2.2.0 + # via stack-data +fastjsonschema==2.21.1 + # via + # nbformat + # rocm-docs-core +gitdb==4.0.12 + # via gitpython +gitpython==3.1.44 + # via rocm-docs-core +greenlet==3.1.1 + # via sqlalchemy +idna==3.10 + # via requests +imagesize==1.4.1 + # via sphinx +importlib-metadata==8.6.1 + # via + # jupyter-cache + # myst-nb +ipykernel==6.29.5 + # via myst-nb +ipython==8.31.0 + # via + # ipykernel + # myst-nb +jedi==0.19.2 + # via ipython +jinja2==3.1.5 + # via + # myst-parser + # sphinx +jsonschema==4.23.0 + # via nbformat +jsonschema-specifications==2024.10.1 + # via jsonschema +jupyter-cache==1.0.1 + # via myst-nb +jupyter-client==8.6.3 + # via + # ipykernel + # nbclient +jupyter-core==5.7.2 + # via + # ipykernel + # jupyter-client + # nbclient + # nbformat +markdown-it-py==3.0.0 + # via + # mdit-py-plugins + # myst-parser +markupsafe==3.0.2 + # via jinja2 +matplotlib-inline==0.1.7 + # via + # ipykernel + # ipython +mdit-py-plugins==0.4.2 + # via myst-parser +mdurl==0.1.2 + # via markdown-it-py +myst-nb==1.1.2 + # via rocm-docs-core +myst-parser==4.0.0 + # via myst-nb +nbclient==0.10.2 + # via + # jupyter-cache + # myst-nb +nbformat==5.10.4 + # via + # jupyter-cache + # myst-nb + # nbclient +nest-asyncio==1.6.0 + # via ipykernel +packaging==24.2 + # via + # ipykernel + # sphinx +parso==0.8.4 + # via jedi +pexpect==4.9.0 + # via ipython +platformdirs==4.3.6 + # via jupyter-core +prompt-toolkit==3.0.50 + # via ipython +psutil==6.1.1 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.3 + # via stack-data +pycparser==2.22 + # via cffi +pydata-sphinx-theme==0.16.1 + # via + # rocm-docs-core + # sphinx-book-theme +pygithub==2.5.0 + # via rocm-docs-core +pygments==2.19.1 + # via + # accessible-pygments + # ipython + # pydata-sphinx-theme + # sphinx +pyjwt[crypto]==2.10.1 + # via pygithub +pynacl==1.5.0 + # via pygithub +python-dateutil==2.9.0.post0 + # via jupyter-client +pyyaml==6.0.2 + # via + # jupyter-cache + # myst-nb + # myst-parser + # rocm-docs-core + # sphinx-external-toc + # sphinxcontrib-datatemplates +pyzmq==26.2.0 + # via + # ipykernel + # jupyter-client +referencing==0.36.1 + # via + # jsonschema + # jsonschema-specifications +requests==2.32.3 + # via + # pygithub + # sphinx +rocm-docs-core==1.21.1 + # via -r requirements.in +rpds-py==0.22.3 + # via + # jsonschema + # referencing +six==1.17.0 + # via python-dateutil +smmap==5.0.2 + # via gitdb +snowballstemmer==2.2.0 + # via sphinx +soupsieve==2.6 + # via beautifulsoup4 +sphinx==8.1.3 + # via + # breathe + # myst-nb + # myst-parser + # pydata-sphinx-theme + # rocm-docs-core + # sphinx-book-theme + # sphinx-copybutton + # sphinx-design + # sphinx-external-toc + # sphinx-notfound-page + # sphinxcontrib-datatemplates + # sphinxcontrib-runcmd +sphinx-book-theme==1.1.3 + # via rocm-docs-core +sphinx-copybutton==0.5.2 + # via rocm-docs-core +sphinx-design==0.6.1 + # via rocm-docs-core +sphinx-external-toc==1.0.1 + # via rocm-docs-core +sphinx-notfound-page==1.0.4 + # via rocm-docs-core +sphinxcontrib-applehelp==2.0.0 + # via sphinx +sphinxcontrib-datatemplates==0.11.0 + # via -r requirements.in +sphinxcontrib-devhelp==2.0.0 + # via sphinx +sphinxcontrib-htmlhelp==2.1.0 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==2.0.0 + # via sphinx +sphinxcontrib-runcmd==0.2.0 + # via sphinxcontrib-datatemplates +sphinxcontrib-serializinghtml==2.0.0 + # via sphinx +sqlalchemy==2.0.37 + # via jupyter-cache +stack-data==0.6.3 + # via ipython +tabulate==0.9.0 + # via jupyter-cache +tomli==2.2.1 + # via sphinx +tornado==6.4.2 + # via + # ipykernel + # jupyter-client +traitlets==5.14.3 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbformat +typing-extensions==4.12.2 + # via + # ipython + # myst-nb + # pydata-sphinx-theme + # pygithub + # referencing + # sqlalchemy +urllib3==2.3.0 + # via + # pygithub + # requests +wcwidth==0.2.13 + # via prompt-toolkit +wrapt==1.17.2 + # via deprecated +zipp==3.21.0 + # via importlib-metadata diff --git a/projects/rocprofiler-compute/docs/sphinx/static/css/o_custom.css b/projects/rocprofiler-compute/docs/sphinx/static/css/o_custom.css new file mode 100644 index 0000000000..b4fe010b59 --- /dev/null +++ b/projects/rocprofiler-compute/docs/sphinx/static/css/o_custom.css @@ -0,0 +1,8 @@ +/* Override PyData Sphinx Theme default colors */ +html[data-theme='light'] { + --pst-color-table-row-hover-bg: #E2E8F0; +} + +html[data-theme='dark'] { + --pst-color-table-row-hover-bg: #1E293B; +} diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/infinity-fabric-transactions.rst b/projects/rocprofiler-compute/docs/tutorial/includes/infinity-fabric-transactions.rst new file mode 100644 index 0000000000..9e26e59bdf --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/includes/infinity-fabric-transactions.rst @@ -0,0 +1,675 @@ +.. _infinity-fabric-example: + +Infinity Fabric transactions +============================ + + For this example, consider the + :dev-sample:`Infinity Fabric™ sample ` distributed as a part of + ROCm Compute Profiler. + +This following code snippet launches a simple read-only kernel. + +.. code-block:: cpp + + // the main streaming kernel + __global__ void kernel(int* x, size_t N, int zero) { + int sum = 0; + const size_t offset_start = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = 0; i < 10; ++i) { + for (size_t offset = offset_start; offset < N; offset += blockDim.x * gridDim.x) { + sum += x[offset]; + } + } + if (sum != 0) { + x[offset_start] = sum; + } + } + +This happens twice -- once as a warm-up and once for analysis. Note that the +buffer ``x`` is initialized to all zeros via a call to ``hipMemcpy`` on the +host before the kernel is ever launched. Therefore, the following conditional +is identically false -- and thus we expect no writes. + +.. code-block:: cpp + + if (sum != 0) { ... + +.. note:: + + The actual sample included with ROCm Compute Profiler also includes the ability to select + different operation types (such as atomics, writes). This abbreviated version + is presented here for reference only. + +Finally, this sample code lets the user control the +:ref:`granularity of an allocation `, the owner of an allocation +(local HBM, CPU DRAM or remote HBM), and the size of an allocation (the default +is :math:`\sim4`\ GiB) via command line arguments. In doing so, we can explore +the impact of these parameters on the L2-Fabric metrics reported by ROCm Compute Profiler to +further understand their meaning. + +.. note:: + + All results in this section were generated an a node of Infinity + Fabric connected MI250 accelerators using ROCm version 5.6.0, and ROCm Compute Profiler + version 2.0.0. Although results may vary with ROCm versions and accelerator + connectivity, we expect the lessons learned here to be broadly applicable. + +.. _infinity-fabric-ex1: + +Experiment 1: Coarse-grained, accelerator-local HBM reads +----------------------------------------------------------- + +In our first experiment, we consider the simplest possible case, a +``hipMalloc``\ ’d buffer that is local to our current accelerator: + +.. code-block:: shell-session + + $ rocprof-compute profile -n coarse_grained_local --no-roof -- ./fabric -t 1 -o 0 + Using: + mtype:CoarseGrained + mowner:Device + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/coarse_grained_local/mi200 -b 17.2.0 17.2.1 17.2.2 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ + │ 17.2.0 │ L2-Fabric Read BW │ 42947428672.00 │ 42947428672.00 │ 42947428672.00 │ Bytes per kernel │ + ├─────────┼─────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.1 │ HBM Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼─────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.2 │ Remote Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧═════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.07 │ 0.07 │ 0.07 │ Pct │ + ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ + │ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.1 │ Read (Uncached) │ 1450.00 │ 1450.00 │ 1450.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.2 │ Read (64B) │ 671053573.00 │ 671053573.00 │ 671053573.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.3 │ HBM Read │ 671053565.00 │ 671053565.00 │ 671053565.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.4 │ Remote Read │ 8.00 │ 8.00 │ 8.00 │ Req per kernel │ + ╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ + +Here, you can make the following observations. + +- The vast majority of L2-Fabric requests (>99%) are 64B + read requests (**17.5.2**). + +- Nearly 100% of the read requests (**17.2.1**) are homed in on the + accelerator-local HBM (**17.5.3**), while some small fraction of these reads are + routed to a “remote” device (**17.5.4**). + +- These drive a :math:`\sim40`\ GiB per kernel read-bandwidth (**17.2.0**). + +In addition, we see a small amount of :ref:`uncached ` reads +(**17.5.1**), these correspond to things like: + +* The assembly code to execute the kernel + +* Kernel arguments + +* Coordinate parameters (such as ``blockDim.z``) that were not initialized by the + hardware, etc. and may account for some of our "remote" read requests + (**17.5.4**), for example, reading from CPU DRAM + +The above list is not exhaustive, nor are all of these guaranteed to be +"uncached" – the exact implementation depends on the accelerator and +ROCm versions used. These read requests could be interrogated further in +the :ref:`Scalar L1 Data Cache ` and +:ref:`Instruction Cache ` metric sections. + +.. note:: + + The Traffic metrics in Sec **17.2** are presented as a percentage of the total + number of requests. For example, "HBM Read Traffic" is the percent of read requests + (**17.5.0** - **17.5.2**) that were directed to the accelerators' local HBM (**17.5.3**). + +.. _infinity-fabric-ex2: + +Experiment 2: Fine-grained, accelerator-local HBM reads +--------------------------------------------------------- + +In this experiment, we change the :ref:`granularity ` of our +device-allocation to be fine-grained device memory, local to the current +accelerator. Our code uses the ``hipExtMallocWithFlag`` API with the +``hipDeviceMallocFinegrained`` flag to accomplish this. + +.. note:: + + On some systems (such as those with only PCIe® connected accelerators), you need + to set the environment variable ``HSA_FORCE_FINE_GRAIN_PCIE=1`` to enable + this memory type. + +.. code-block:: shell-session + + $ rocprof-compute profile -n fine_grained_local --no-roof -- ./fabric -t 0 -o 0 + Using: + mtype:FineGrained + mowner:Device + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/fine_grained_local/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ + │ 17.2.0 │ L2-Fabric Read BW │ 42948661824.00 │ 42948661824.00 │ 42948661824.00 │ Bytes per kernel │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.1 │ HBM Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.2 │ Remote Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.3 │ Uncached Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.07 │ 0.07 │ 0.07 │ Pct │ + ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ + │ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.1 │ Read (Uncached) │ 1334.00 │ 1334.00 │ 1334.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.2 │ Read (64B) │ 671072841.00 │ 671072841.00 │ 671072841.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.3 │ HBM Read │ 671072835.00 │ 671072835.00 │ 671072835.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.4 │ Remote Read │ 6.00 │ 6.00 │ 6.00 │ Req per kernel │ + ╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ + +Comparing with our :ref:`previous example `, we see a +relatively similar result, namely: + +- The vast majority of L2-Fabric requests are 64B read requests (**17.5.2**) + +- Nearly all these read requests are directed to the accelerator-local HBM (**17.2.1**) + +In addition, we now see a small percentage of HBM Read Stalls (**17.4.2**), +as streaming fine-grained memory is putting more stress on Infinity +Fabric. + +.. note:: + + The stalls in Sec 17.4 are presented as a percentage of the total number + active L2 cycles, summed over :doc:`all L2 channels `. + +.. _infinity-fabric-ex3: + +Experiment 3: Fine-grained, remote-accelerator HBM reads +---------------------------------------------------------- + +In this experiment, we move our :ref:`fine-grained ` allocation to +be owned by a remote accelerator. We accomplish this by first changing +the HIP device using, for instance, the ``hipSetDevice(1)`` API, then allocating +fine-grained memory (as described :ref:`previously `), and +finally resetting the device back to the default, for instance, +``hipSetDevice(0)``. + +Although we have not changed our code significantly, we do see a +substantial change in the L2-Fabric metrics: + +.. code-block:: shell-session + + $ rocprof-compute profile -n fine_grained_remote --no-roof -- ./fabric -t 0 -o 2 + Using: + mtype:FineGrained + mowner:Remote + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/fine_grained_remote/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ + │ 17.2.0 │ L2-Fabric Read BW │ 42949692736.00 │ 42949692736.00 │ 42949692736.00 │ Bytes per kernel │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.1 │ HBM Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.2 │ Remote Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.3 │ Uncached Read Traffic │ 200.00 │ 200.00 │ 200.00 │ Pct │ + ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 17.85 │ 17.85 │ 17.85 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════╤═══════════════╤═══════════════╤═══════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╡ + │ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.1 │ Read (Uncached) │ 1342177894.00 │ 1342177894.00 │ 1342177894.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.2 │ Read (64B) │ 671088949.00 │ 671088949.00 │ 671088949.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.3 │ HBM Read │ 307.00 │ 307.00 │ 307.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.4 │ Remote Read │ 671088642.00 │ 671088642.00 │ 671088642.00 │ Req per kernel │ + ╘═════════╧═════════════════╧═══════════════╧═══════════════╧═══════════════╧════════════════╛ + +First, we see that while we still observe approximately the same number +of 64B Read Requests (**17.5.2**), we now see an even larger number of +Uncached Read Requests (**17.5.3**). Some simple division reveals: + +.. math:: + + 342177894.00 / 671088949.00 ≈ 2 + +That is, each 64B Read Request is *also* counted as two Uncached Read +Requests, as reflected in the :ref:`request-flow diagram `. +This is also why the Uncached Read Traffic metric (**17.2.3**) is at the +counter-intuitive value of 200%! + +In addition, observe that: + +- We no longer see any significant number of HBM Read Requests (**17.2.1**, + **17.5.3**), nor HBM Read Stalls (**17.4.2**), but instead, + +- we see that almost all of these requests are considered “remote” + (**17.2.2**, **17.5.4**) are being routed to another + accelerator, or the CPU — in this case HIP Device 1 — and, + +- we see a significantly larger percentage of AMD Infinity Fabric Read Stalls + (**17.4.1**) as compared to the HBM Read Stalls in the + :ref:`previous example `. + +These stalls correspond to reads that are going out over the AMD +Infinity Fabric connection to another MI250 accelerator. In +addition, because these are crossing between accelerators, we expect +significantly lower achievable bandwidths as compared to the local +accelerator’s HBM – this is reflected (indirectly) in the magnitude of +the stall metric (**17.4.1**). Finally, we note that if our system contained +only PCIe connected accelerators, these observations will differ. + +.. _infinity-fabric-ex4: + +Experiment 4: Fine-grained, CPU-DRAM reads +-------------------------------------------- + +In this experiment, we move our :ref:`fine-grained ` allocation to +be owned by the CPU’s DRAM. We accomplish this by allocating host-pinned +fine-grained memory using the ``hipHostMalloc`` API: + +.. code-block:: shell-session + + $ rocprof-compute profile -n fine_grained_host --no-roof -- ./fabric -t 0 -o 1 + Using: + mtype:FineGrained + mowner:Host + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/fine_grained_host/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ + │ 17.2.0 │ L2-Fabric Read BW │ 42949691264.00 │ 42949691264.00 │ 42949691264.00 │ Bytes per kernel │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.1 │ HBM Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.2 │ Remote Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.3 │ Uncached Read Traffic │ 200.00 │ 200.00 │ 200.00 │ Pct │ + ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 91.29 │ 91.29 │ 91.29 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════╤═══════════════╤═══════════════╤═══════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╡ + │ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.1 │ Read (Uncached) │ 1342177848.00 │ 1342177848.00 │ 1342177848.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.2 │ Read (64B) │ 671088926.00 │ 671088926.00 │ 671088926.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.3 │ HBM Read │ 284.00 │ 284.00 │ 284.00 │ Req per kernel │ + ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤ + │ 17.5.4 │ Remote Read │ 671088642.00 │ 671088642.00 │ 671088642.00 │ Req per kernel │ + ╘═════════╧═════════════════╧═══════════════╧═══════════════╧═══════════════╧════════════════╛ + +Here we see *almost* the same results as in the +:ref:`previous experiment `, however now as we are crossing +a PCIe bus to the CPU, we see that the Infinity Fabric Read stalls (**17.4.1**) +have shifted to be a PCIe stall (**17.4.2**). In addition, as (on this +system) the PCIe bus has a lower peak bandwidth than the AMD Infinity +Fabric connection between two accelerators, we once again observe an +increase in the percentage of stalls on this interface. + +.. note:: + + Had we performed this same experiment on an + `MI250X system `_, + these transactions would again have been marked as Infinity Fabric Read + stalls (**17.4.1**), as the CPU is connected to the accelerator via AMD Infinity + Fabric. + +.. _infinity-fabric-ex5: + +Experiment 5: Coarse-grained, CPU-DRAM reads +---------------------------------------------- + +In our next fabric experiment, we change our CPU memory allocation to be +:ref:`coarse-grained `. We accomplish this by passing the +``hipHostMalloc`` API the ``hipHostMallocNonCoherent`` flag, to mark the +allocation as coarse-grained: + +.. code-block:: shell-session + + $ rocprof-compute profile -n coarse_grained_host --no-roof -- ./fabric -t 1 -o 1 + Using: + mtype:CoarseGrained + mowner:Host + mspace:Global + mop:Read + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/coarse_grained_host/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ + │ 17.2.0 │ L2-Fabric Read BW │ 42949691264.00 │ 42949691264.00 │ 42949691264.00 │ Bytes per kernel │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.1 │ HBM Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.2 │ Remote Read Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.3 │ Uncached Read Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.0 │ Read - PCIe Stall │ PCIe Stall │ Read │ 91.27 │ 91.27 │ 91.27 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.1 │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.2 │ Read - HBM Stall │ HBM Stall │ Read │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ + │ 17.5.0 │ Read (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.1 │ Read (Uncached) │ 562.00 │ 562.00 │ 562.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.2 │ Read (64B) │ 671088926.00 │ 671088926.00 │ 671088926.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.3 │ HBM Read │ 281.00 │ 281.00 │ 281.00 │ Req per kernel │ + ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.4 │ Remote Read │ 671088645.00 │ 671088645.00 │ 671088645.00 │ Req per kernel │ + ╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ + +Here we see a similar result to our +:ref:`previous experiment `, with one key difference: our +accesses are no longer marked as Uncached Read requests (**17.2.3, 17.5.1**), but instead +are 64B read requests (**17.5.2**), as observed in our +:ref:`Coarse-grained, accelerator-local HBM ` experiment. + +.. _infinity-fabric-ex6: + +Experiment 6: Fine-grained, CPU-DRAM writes +-------------------------------------------- + +Thus far in our exploration of the L2-Fabric interface, we have +primarily focused on read operations. However, in +:ref:`our request flow diagram `, we note that writes are +counted separately. To observe this, we use the ``-p`` flag to trigger write +operations to fine-grained memory allocated on the host: + +.. code-block:: shell-session + + $ rocprof-compute profile -n fine_grained_host_write --no-roof -- ./fabric -t 0 -o 1 -p 1 + Using: + mtype:FineGrained + mowner:Host + mspace:Global + mop:Write + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/fine_grained_host_writes/mi200 -b 17.2.4 17.2.5 17.2.6 17.2.7 17.2.8 17.4.3 17.4.4 17.4.5 17.4.6 17.5.5 17.5.6 17.5.7 17.5.8 17.5.9 17.5.10 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═══════════════════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡ + │ 17.2.4 │ L2-Fabric Write and Atomic BW │ 42949672960.00 │ 42949672960.00 │ 42949672960.00 │ Bytes per kernel │ + ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.5 │ HBM Write and Atomic Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.6 │ Remote Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.7 │ Atomic Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤ + │ 17.2.8 │ Uncached Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ╘═════════╧═══════════════════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤════════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.3 │ Write - PCIe Stall │ PCIe Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.4 │ Write - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.5 │ Write - HBM Stall │ HBM Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.6 │ Write - Credit Starvation │ Credit Starvation │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧════════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡ + │ 17.5.5 │ Write (32B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.6 │ Write (Uncached) │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.7 │ Write (64B) │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.8 │ HBM Write and Atomic │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.9 │ Remote Write and Atomic │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤ + │ 17.5.10 │ Atomic │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ╘═════════╧═════════════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛ + +Here we notice a few changes in our request pattern: + +* As expected, the requests have changed from 64B Reads to 64B Write requests + (**17.5.7**), + +* these requests are homed in on a “remote” destination (**17.2.6, 17.5.9**), as + expected, and + +* these are also counted as a single Uncached Write request (**17.5.6**). + +In addition, there are rather significant changes in the bandwidth values +reported: + +- The “L2-Fabric Write and Atomic” bandwidth metric (**17.2.4**) + reports about 40GiB of data written across Infinity Fabric while + +- The “Remote Write and Traffic” metric (**17.2.5**) indicates that nearly + 100% of these request are being directed to a remote source. + +The precise meaning of these metrics are explored in the +:ref:`subsequent experiment `. + +Finally, we note that we see no write stalls on the PCIe bus +(**17.4.3**). This is because writes over a PCIe bus `are +non-posted `_, +that is, they do not require acknowledgement. + +.. _infinity-fabric-ex7: + +Experiment 7: Fine-grained, CPU-DRAM atomicAdd +------------------------------------------------ + +Next, we change our experiment to instead target ``atomicAdd`` +operations to the CPU’s DRAM. + +.. code-block:: shell-session + + $ rocprof-compute profile -n fine_grained_host_add --no-roof -- ./fabric -t 0 -o 1 -p 2 + Using: + mtype:FineGrained + mowner:Host + mspace:Global + mop:Add + mdata:Unsigned + remoteId:-1 + <...> + $ rocprof-compute analyze -p workloads/fine_grained_host_add/mi200 -b 17.2.4 17.2.5 17.2.6 17.2.7 17.2.8 17.4.3 17.4.4 17.4.5 17.4.6 17.5.5 17.5.6 17.5.7 17.5.8 17.5.9 17.5.10 -n per_kernel --dispatch 2 + <...> + 17. L2 Cache + 17.2 L2 - Fabric Transactions + ╒═════════╤═══════════════════════════════════╤══════════════╤══════════════╤══════════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════════════════╪══════════════╪══════════════╪══════════════╪══════════════════╡ + │ 17.2.4 │ L2-Fabric Write and Atomic BW │ 429496736.00 │ 429496736.00 │ 429496736.00 │ Bytes per kernel │ + ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ + │ 17.2.5 │ HBM Write and Atomic Traffic │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ + │ 17.2.6 │ Remote Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ + │ 17.2.7 │ Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤ + │ 17.2.8 │ Uncached Write and Atomic Traffic │ 100.00 │ 100.00 │ 100.00 │ Pct │ + ╘═════════╧═══════════════════════════════════╧══════════════╧══════════════╧══════════════╧══════════════════╛ + 17.4 L2 - Fabric Interface Stalls + ╒═════════╤════════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Type │ Transaction │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡ + │ 17.4.3 │ Write - PCIe Stall │ PCIe Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.4 │ Write - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.5 │ Write - HBM Stall │ HBM Stall │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤ + │ 17.4.6 │ Write - Credit Starvation │ Credit Starvation │ Write │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧════════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛ + 17.5 L2 - Fabric Detailed Transaction Breakdown + ╒═════════╤═════════════════════════╤═════════════╤═════════════╤═════════════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════════╪═════════════╪═════════════╪═════════════╪════════════════╡ + │ 17.5.5 │ Write (32B) │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ + │ 17.5.6 │ Write (Uncached) │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ + │ 17.5.7 │ Write (64B) │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ + │ 17.5.8 │ HBM Write and Atomic │ 0.00 │ 0.00 │ 0.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ + │ 17.5.9 │ Remote Write and Atomic │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ + ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤ + │ 17.5.10 │ Atomic │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │ + ╘═════════╧═════════════════════════╧═════════════╧═════════════╧═════════════╧════════════════╛ + +In this case, there is quite a lot to unpack: + +- For the first time, the 32B Write requests (**17.5.5**) are heavily used. + +- These correspond to Atomic requests (**17.2.7, 17.5.10**), and are counted as + Uncached Writes (**17.5.6**). + +- The L2-Fabric Write and Atomic bandwidth metric (**17.2.4**) shows about 0.4 + GiB of traffic. For convenience, the sample reduces the default problem size + for this case due to the speed of atomics across a PCIe bus, and finally, + +- The traffic is directed to a remote device (**17.2.6, 17.5.9**). + +Let's consider what an “atomic” request means in this context. Recall +that we are discussing memory traffic flowing from the L2 cache, the +device-wide coherence point on current CDNA accelerators such as the +MI250, to for example, the CPU’s DRAM. In this light, we see that these +requests correspond to *system scope* atomics, and specifically in the +case of the MI250, to fine-grained memory. + + +.. rubric:: Disclaimer + +PCIe® is a registered trademark of PCI-SIG Corporation. + +.. + `Leave as possible future experiment to add + + + ### Experiment #2 - Non-temporal writes + + If we take the same code (for convenience only) as previously described, we can demonstrate how to achieve 'streaming' writes, as described in the [L2 Cache Access metrics](L2_cache_metrics) section. + To see this, we use the Clang built-in [`__builtin_nontemporal_store`](https://clang.llvm.org/docs/LanguageExtensions.html#non-temporal-load-store-builtins), for example + + ``` + template + __device__ void store (T* ptr, T val) { + __builtin_nontemporal_store(val, ptr); + } + ``` + + On an AMD MI2XX accelerator, for FP32 values this will generate a `global_store_dword` instruction, with the `glc` and `slc` bits set, described in [section 10.1](https://developer.amd.com/wp-content/resources/CDNA2_Shader_ISA_4February2022.pdf) of the CDNA2 ISA guide.` diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/instructions-per-cycle-and-utilizations.rst b/projects/rocprofiler-compute/docs/tutorial/includes/instructions-per-cycle-and-utilizations.rst new file mode 100644 index 0000000000..c9efe85025 --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/includes/instructions-per-cycle-and-utilizations.rst @@ -0,0 +1,486 @@ +.. _ipc-example: + +Instructions-per-cycle and utilizations example +=============================================== + +For this example, consider the +:dev-sample:`instructions-per-cycle (IPC) example ` included with +ROCm Compute Profiler. + +This example is compiled using ``c++17`` support: + +.. code-block:: shell + + $ hipcc -O3 ipc.hip -o ipc -std=c++17 + +and was run on an MI250 CDNA2 accelerator: + +.. code-block:: shell + + $ rocprof-compute profile -n ipc --no-roof -- ./ipc + +The results shown in this section are *generally* applicable to CDNA +accelerators, but may vary between generations and specific products. + +.. _ipc-experiment-design-note: + +Design note +----------- + +The kernels in this example all execute a specific assembly operation +``N`` times (1000, by default), for instance the ``vmov`` kernel: + +.. code-block:: cpp + + template + __device__ void vmov_op() { + int dummy; + if constexpr (N >= 1) { + asm volatile("v_mov_b32 v0, v1\n" : : "{v31}"(dummy)); + vmov_op(); + } + } + + template + __global__ void vmov() { + vmov_op(); + } + +The kernels are then launched twice, once for a warm-up run, and once +for measurement. + +.. _ipc-valu-utilization: + +VALU utilization and IPC +------------------------ + +Now we can use our test to measure the achieved instructions-per-cycle +of various types of instructions. We start with a simple :ref:`VALU ` +operation, i.e., a ``v_mov_b32`` instruction, e.g.: + +.. code-block:: asm + + v_mov_b32 v0, v1 + +This instruction simply copies the contents from the source register +(``v1``) to the destination register (``v0``). Investigating this kernel +with ROCm Compute Profiler, we see: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/ipc/mi200/ --dispatch 7 -b 11.2 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ + │ 0 │ void vmov<1000>() [clone .kd] │ 1.00 │ 99317423.00 │ 99317423.00 │ 99317423.00 │ 100.00 │ + ╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 11. Compute Units - Compute Pipeline + 11.2 Pipeline Stats + ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ + │ 11.2.0 │ IPC │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.2 │ SALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.3 │ VALU Util │ 99.98 │ 99.98 │ 99.98 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.5 │ Branch Util │ 0.1 │ 0.1 │ 0.1 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.6 │ VALU Active Threads │ 64.0 │ 64.0 │ 64.0 │ Threads │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ + ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ + +Here we see that: + +1. Both the IPC (**11.2.0**) and “Issued” IPC (**11.2.1**) metrics are + :math:`\sim 1` +2. The VALU Utilization metric (**11.2.3**) is also :math:`\sim100\%`, and + finally +3. The VALU Active Threads metric (**11.2.4**) is 64, i.e., the wavefront + size on CDNA accelerators, as all threads in the wavefront are + active. + +We will explore the difference between the IPC (**11.2.0**) and “Issued” IPC +(**11.2.1**) metrics in the :ref:`next section `. + +Additionally, we notice a small (0.1%) Branch utilization (**11.2.5**). +Inspecting the assembly of this kernel shows there are no branch +operations, however recalling the note in the :ref:`Pipeline +statistics ` section: + + The branch utilization <…> includes time spent in other instruction + types (namely: ``s_endpgm``) that are *typically* a very small + percentage of the overall kernel execution. + +We see that this is coming from execution of the ``s_endpgm`` +instruction at the end of every wavefront. + +.. note:: + + Technically, the cycle counts used in the denominators of our IPC metrics are + actually in units of quad-cycles, a group of 4 consecutive cycles. However, a + typical :ref:`VALU ` instruction on CDNA accelerators runs for a + single quad-cycle (see :gcn-crash-course:`30`). Therefore, for simplicity, we + simply report these metrics as "instructions per cycle". + +.. _issued-ipc: + +Exploring “issued” IPC via MFMA operations +------------------------------------------ + +.. warning:: + + The MFMA assembly operations used in this example are inherently not portable + to older CDNA architectures. + +Unlike the simple quad-cycle ``v_mov_b32`` operation discussed in our +:ref:`previous example `, some operations take many +quad-cycles to execute. For example, using the +`AMD Matrix Instruction Calculator `_ +we can see that some :ref:`MFMA ` operations take 64 cycles, e.g.: + +.. code-block:: shell + + $ ./matrix_calculator.py --arch CDNA2 --detail-instruction --instruction v_mfma_f32_32x32x8bf16_1k + Architecture: CDNA2 + Instruction: V_MFMA_F32_32X32X8BF16_1K + <...> + Execution statistics: + FLOPs: 16384 + Execution cycles: 64 + FLOPs/CU/cycle: 1024 + Can co-execute with VALU: True + VALU co-execution cycles possible: 60 + +What happens to our IPC when we utilize this ``v_mfma_f32_32x32x8bf16_1k`` +instruction on a CDNA2 accelerator? To find out, we turn to our ``mfma`` kernel +in the IPC example: + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/ipc/mi200/ --dispatch 8 -b 11.2 --decimal 4 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═══════════════════════════════╤═════════╤═════════════════╤═════════════════╤═════════════════╤══════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═══════════════════════════════╪═════════╪═════════════════╪═════════════════╪═════════════════╪══════════╡ + │ 0 │ void mfma<1000>() [clone .kd] │ 1.0000 │ 1623167595.0000 │ 1623167595.0000 │ 1623167595.0000 │ 100.0000 │ + ╘════╧═══════════════════════════════╧═════════╧═════════════════╧═════════════════╧═════════════════╧══════════╛ + + + -------------------------------------------------------------------------------- + 11. Compute Units - Compute Pipeline + 11.2 Pipeline Stats + ╒═════════╤═════════════════════╤═════════╤═════════╤═════════╤══════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════╪═════════╪═════════╪═════════╪══════════════╡ + │ 11.2.0 │ IPC │ 0.0626 │ 0.0626 │ 0.0626 │ Instr/cycle │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.1 │ IPC (Issued) │ 1.0000 │ 1.0000 │ 1.0000 │ Instr/cycle │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.2 │ SALU Util │ 0.0000 │ 0.0000 │ 0.0000 │ Pct │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.3 │ VALU Util │ 6.2496 │ 6.2496 │ 6.2496 │ Pct │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.4 │ VMEM Util │ 0.0000 │ 0.0000 │ 0.0000 │ Pct │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.5 │ Branch Util │ 0.0062 │ 0.0062 │ 0.0062 │ Pct │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.6 │ VALU Active Threads │ 64.0000 │ 64.0000 │ 64.0000 │ Threads │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.7 │ MFMA Util │ 99.9939 │ 99.9939 │ 99.9939 │ Pct │ + ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤ + │ 11.2.8 │ MFMA Instr Cycles │ 64.0000 │ 64.0000 │ 64.0000 │ Cycles/instr │ + ╘═════════╧═════════════════════╧═════════╧═════════╧═════════╧══════════════╛ + +In contrast to our :ref:`VALU IPC example `, we now see +that the IPC metric (**11.2.0**) and Issued IPC (**11.2.1**) metric differ +substantially. First, we see the VALU utilization (**11.2.3**) has decreased +substantially, from nearly 100% to :math:`\sim6.25\%`. We note that this matches +the ratio of: :math:`((Execution\ cycles) - (VALU\ coexecution\ cycles)) / (Execution\ cycles)` +reported by the matrix calculator, while the MFMA utilization (**11.2.7**) +has increased to nearly 100%. + +Recall that our ``v_mfma_f32_32x32x8bf16_1k`` instruction takes 64 cycles to +execute, or 16 quad-cycles, matching our observed MFMA Instruction +Cycles (**11.2.8**). That is, we have a single instruction executed every 16 +quad-cycles, or :math:`1/16 = 0.0625`, which is almost identical to our IPC +metric (**11.2.0**). Why then is the Issued IPC metric (**11.2.1**) equal to 1.0? + +Instead of simply counting the number of instructions issued and +dividing by the number of cycles the :doc:`CUs ` on +the accelerator were active (as is done for **11.2.0**), this metric is formulated +differently, and instead counts the number of +(non-:ref:`internal `) instructions issued divided +by the number of (quad-) cycles where the :ref:`scheduler ` was +actively working on issuing instructions. Thus the Issued IPC metric +(**11.2.1**) gives more of a sense of “what percent of the total number of +:ref:`scheduler ` cycles did a wave schedule an instruction?” +while the IPC metric (**11.2.0**) indicates the ratio of the number of +instructions executed over the total +:ref:`active CU cycles `. + +.. warning:: + + There are further complications of the Issued IPC metric (**11.2.1**) that make + its use more complicated. We will be explore that in the + :ref:`following section `. For these reasons, + ROCm Compute Profiler typically promotes use of the regular IPC metric (**11.2.0**), e.g., in + the top-level Speed-of-Light chart. + +.. _ipc-internal-instructions: + +Internal instructions and IPC +----------------------------- + +Next, we explore the concept of an “internal” instruction. From +:gcn-crash-course:`29`, we see a few candidates for internal instructions, and +we choose a ``s_nop`` instruction, which according to the +:mi200-isa-pdf:`CDNA2 ISA guide <>`: + + Does nothing; it can be repeated in hardware up to eight times. + +Here we choose to use the following no-op to make our point: + +.. code-block:: asm + + s_nop 0x0 + +Running this kernel through ROCm Compute Profiler yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/ipc/mi200/ --dispatch 9 -b 11.2 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ + │ 0 │ void snop<1000>() [clone .kd] │ 1.00 │ 14221851.50 │ 14221851.50 │ 14221851.50 │ 100.00 │ + ╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 11. Compute Units - Compute Pipeline + 11.2 Pipeline Stats + ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ + │ 11.2.0 │ IPC │ 6.79 │ 6.79 │ 6.79 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.2 │ SALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.3 │ VALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.5 │ Branch Util │ 0.68 │ 0.68 │ 0.68 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.6 │ VALU Active Threads │ │ │ │ Threads │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ + ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ + +First, we see that the IPC metric (**11.2.0**) tops our theoretical maximum +of 5 instructions per cycle (discussed in the :ref:`scheduler ` +section). How can this be? + +Recall that :gcn-crash-course:`27` say “no functional unit” for the internal +instructions. This removes the limitation on the IPC. If we are *only* +issuing internal instructions, we are not issuing to any execution +units! However, workloads such as these are almost *entirely* artificial +(that is, repeatedly issuing internal instructions almost exclusively). In +practice, a maximum of IPC of 5 is expected in almost all cases. + +Secondly, note that our “Issued” IPC (**11.2.1**) is still identical to +the one here. Again, this has to do with the details of “internal” +instructions. Recall in our :ref:`previous example ` we defined +this metric as explicitly excluding internal instruction counts. The +logical question then is, "what *is* this metric counting in our +``s_nop`` kernel?" + +The generated assembly looks something like: + +.. code-block:: asm + + ;;#ASMSTART + s_nop 0x0 + ;;#ASMEND + ;;#ASMSTART + s_nop 0x0 + ;;#ASMEND + ;;<... omitting many more ...> + s_endpgm + .section .rodata,#alloc + .p2align 6, 0x0 + .amdhsa_kernel _Z4snopILi1000EEvv + +Of particular interest here is the ``s_endpgm`` instruction, of which +the `CDNA2 ISA +guide `__ +states: + + End of program; terminate wavefront. + +This is not on our list of internal instructions from +:gcn-crash-course:`The AMD GCN Architecture <>`, and is therefore counted as part +of our Issued IPC (**11.2.1**). Thus, the issued IPC being equal to one here +indicates that we issued an ``s_endpgm`` instruction every cycle the +:ref:`scheduler ` was active for non-internal instructions, which +is expected as this was our *only* non-internal instruction. + +SALU Utilization +---------------- + +Next, we explore a simple :ref:`SALU ` kernel in our on-going IPC and +utilization example. For this case, we select a simple scalar move +operation, for instance: + +.. code-block:: asm + + s_mov_b32 s0, s1 + +which, in analogue to our :ref:`v_mov ` example, copies the +contents of the source scalar register (``s1``) to the destination +scalar register (``s0``). Running this kernel through ROCm Compute Profiler yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/ipc/mi200/ --dispatch 10 -b 11.2 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ + │ 0 │ void smov<1000>() [clone .kd] │ 1.00 │ 96246554.00 │ 96246554.00 │ 96246554.00 │ 100.00 │ + ╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 11. Compute Units - Compute Pipeline + 11.2 Pipeline Stats + ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ + │ 11.2.0 │ IPC │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.2 │ SALU Util │ 99.98 │ 99.98 │ 99.98 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.3 │ VALU Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.5 │ Branch Util │ 0.1 │ 0.1 │ 0.1 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.6 │ VALU Active Threads │ │ │ │ Threads │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ + ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ + +Here we see that: + +- Both our IPC (**11.2.0**) and Issued IPC (**11.2.1**) are + :math:`\sim1.0` as expected, and + +- The SALU Utilization (**11.2.2**) was + nearly 100% as it was active for almost the entire kernel. + +VALU Active Threads +------------------- + +For our final IPC/Utilization example, we consider a slight modification +of our :ref:`v_mov ` example: + +.. code-block:: cpp + + template + __global__ void vmov_with_divergence() { + if (threadIdx.x % 64 == 0) + vmov_op(); + } + +That is, we wrap our :ref:`VALU ` operation inside a conditional +where only one lane in our wavefront is active. Running this kernel +through ROCm Compute Profiler yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/ipc/mi200/ --dispatch 11 -b 11.2 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤══════════════════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪══════════════════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡ + │ 0 │ void vmov_with_divergence<1000>() [clone │ 1.00 │ 97125097.00 │ 97125097.00 │ 97125097.00 │ 100.00 │ + │ │ .kd] │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 11. Compute Units - Compute Pipeline + 11.2 Pipeline Stats + ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡ + │ 11.2.0 │ IPC │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.1 │ IPC (Issued) │ 1.0 │ 1.0 │ 1.0 │ Instr/cycle │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.2 │ SALU Util │ 0.1 │ 0.1 │ 0.1 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.3 │ VALU Util │ 99.98 │ 99.98 │ 99.98 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.4 │ VMEM Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.5 │ Branch Util │ 0.2 │ 0.2 │ 0.2 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.6 │ VALU Active Threads │ 1.13 │ 1.13 │ 1.13 │ Threads │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.7 │ MFMA Util │ 0.0 │ 0.0 │ 0.0 │ Pct │ + ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤ + │ 11.2.8 │ MFMA Instr Cycles │ │ │ │ Cycles/instr │ + ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛ + +Here we see that once again, our VALU Utilization (**11.2.3**) is nearly +100%. However, we note that the VALU Active Threads metric (**11.2.6**) is +:math:`\sim 1`, which matches our conditional in the source code. So +VALU Active Threads reports the average number of lanes of our wavefront +that are active over all :ref:`VALU ` instructions, or thread +“convergence” (i.e., 1 - :ref:`divergence `). + +.. note:: + + 1. The act of evaluating a vector conditional in this example typically triggers VALU operations, contributing to why the VALU Active Threads metric is not identically one. + 2. This metric is a time (cycle) averaged value, and thus contains an implicit dependence on the duration of various VALU instructions. + + Nonetheless, this metric serves as a useful measure of thread-convergence. + +Finally, we note that our branch utilization (**11.2.5**) has increased +slightly from our baseline, as we now have a branch (checking the value +of ``threadIdx.x``). diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/lds-examples.rst b/projects/rocprofiler-compute/docs/tutorial/includes/lds-examples.rst new file mode 100644 index 0000000000..8d1b7b1a98 --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/includes/lds-examples.rst @@ -0,0 +1,272 @@ +.. _lds-examples: + +LDS examples +============ + +For this example, consider the +:dev-sample:`LDS sample ` distributed as a part of ROCm Compute Profiler. This +code contains two kernels to explore how both :doc:`LDS ` bandwidth and +bank conflicts are calculated in ROCm Compute Profiler. + +This example was compiled and run on an MI250 accelerator using ROCm +v5.6.0, and ROCm Compute Profiler v2.0.0. + +.. code-block:: shell-session + + $ hipcc -O3 lds.hip -o lds + +Finally, we generate our ``rocprof-compute profile`` as: + +.. code-block:: shell-session + + $ rocprof-compute profile -n lds --no-roof -- ./lds + +.. _lds-bandwidth: + +LDS bandwidth +------------- + +To explore our *theoretical LDS bandwidth* metric, we use a simple +kernel: + +.. code-block:: cpp + + constexpr unsigned max_threads = 256; + __global__ void load(int* out, int flag) { + __shared__ int array[max_threads]; + int index = threadIdx.x; + // fake a store to the LDS array to avoid unwanted behavior + if (flag) + array[max_threads - index] = index; + __syncthreads(); + int x = array[index]; + if (x == int(-1234567)) + out[threadIdx.x] = x; + } + +Here we: + +* Create an array of 256 integers in :doc:`LDS ` + +* Fake a write to the LDS using the ``flag`` variable (always set to zero on the + host) to avoid dead-code elimination + +* Read a single integer per work-item from ``threadIdx.x`` of the LDS array + +* If the integer is equal to a magic number (always false), write the value out + to global memory to again, avoid dead-code elimination + +Finally, we launch this kernel repeatedly, varying the number of threads +in our workgroup: + +.. code-block:: cpp + + void bandwidth_demo(int N) { + for (int i = 1; i <= N; ++i) + load<<<1,i>>>(nullptr, 0); + hipDeviceSynchronize(); + } + +Next, let’s analyze the first of our bandwidth kernel dispatches: + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/lds/mi200/ -b 12.2.1 --dispatch 0 -n per_kernel + <...> + 12. Local Data Share (LDS) + 12.2 LDS Stats + ╒═════════╤═══════════════════════╤════════╤════════╤════════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪════════╪════════╪════════╪══════════════════╡ + │ 12.2.1 │ Theoretical Bandwidth │ 256.00 │ 256.00 │ 256.00 │ Bytes per kernel │ + ╘═════════╧═══════════════════════╧════════╧════════╧════════╧══════════════════╛ + +Here we see that our Theoretical Bandwidth metric (**12.2.1**) is reporting +256 Bytes were loaded even though we launched a single work-item +workgroup, and thus only loaded a single integer from LDS. Why is this? + +Recall our definition of this metric: + + Indicates the maximum amount of bytes that could have been loaded + from/stored to/atomically updated in the LDS per + :ref:`normalization unit `. + +Here we see that this instruction *could* have loaded up to 256 bytes of +data (4 bytes for each work-item in the wavefront), and therefore this +is the expected value for this metric in ROCm Compute Profiler, hence why this metric +is named the “theoretical” bandwidth. + +To further illustrate this point we plot the relationship of the +theoretical bandwidth metric (**12.2.1**) as compared to the effective (or +achieved) bandwidth of this kernel, varying the number of work-items +launched from 1 to 256: + +.. figure:: ../data/profiling-by-example/ldsbandwidth.png + :align: center + :alt: Comparison of effective bandwidth versus the theoretical bandwidth + metric in ROCm Compute Profiler for our simple example. + :width: 800 + + Comparison of effective bandwidth versus the theoretical bandwidth + metric in ROCm Compute Profiler for our simple example. + +Here we see that the theoretical bandwidth metric follows a step-function. It +increases only when another wavefront issues an LDS instruction for up to 256 +bytes of data. Such increases are marked in the plot using dashed lines. In +contrast, the effective bandwidth increases linearly, by 4 bytes, with the +number of work-items in the kernel, N. + +.. _lds-bank-conflicts: + +Bank conflicts +-------------- + +Next we explore bank conflicts using a slight modification of our bandwidth +kernel: + +.. code-block:: cpp + + constexpr unsigned nbanks = 32; + __global__ void conflicts(int* out, int flag) { + constexpr unsigned nelements = nbanks * max_threads; + __shared__ int array[nelements]; + // each thread reads from the same bank + int index = threadIdx.x * nbanks; + // fake a store to the LDS array to avoid unwanted behavior + if (flag) + array[max_threads - index] = index; + __syncthreads(); + int x = array[index]; + if (x == int(-1234567)) + out[threadIdx.x] = x; + } + +Here we: + +* Allocate an :doc:`LDS ` array of size + :math:`32*256*4{B}=32{KiB}` + +* Fake a write to the LDS using the ``flag`` + variable (always set to zero on the host) to avoid dead-code elimination + +* Read a single integer per work-item from index + ``threadIdx.x * nbanks`` of the LDS array + +* If the integer is equal to a + magic number (always false), write the value out to global memory to, + again, avoid dead-code elimination. + +On the host, we again repeatedly launch this kernel, varying the number +of work-items: + +.. code-block:: cpp + + void conflicts_demo(int N) { + for (int i = 1; i <= N; ++i) + conflicts<<<1,i>>>(nullptr, 0); + hipDeviceSynchronize(); + } + +Analyzing our first ``conflicts`` kernel (i.e., a single work-item), we +see: + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/lds/mi200/ -b 12.2.4 12.2.6 --dispatch 256 -n per_kernel + <...> + -------------------------------------------------------------------------------- + 12. Local Data Share (LDS) + 12.2 LDS Stats + ╒═════════╤════════════════╤═══════╤═══════╤═══════╤═══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════╪═══════╪═══════╪═══════╪═══════════════════╡ + │ 12.2.4 │ Index Accesses │ 2.00 │ 2.00 │ 2.00 │ Cycles per kernel │ + ├─────────┼────────────────┼───────┼───────┼───────┼───────────────────┤ + │ 12.2.6 │ Bank Conflict │ 0.00 │ 0.00 │ 0.00 │ Cycles per kernel │ + ╘═════════╧════════════════╧═══════╧═══════╧═══════╧═══════════════════╛ + +In our :ref:`previous example `, we showed how a load +from a single work-item is considered to have a theoretical bandwidth of +256B. Recall, the :doc:`LDS ` can load up to :math:`128B` per +cycle (i.e, 32 banks x 4B / bank / cycle). Hence, we see that loading an 4B +integer spends two cycles accessing the LDS +(:math:`2\ {cycle} = (256B) / (128\ B/{cycle})`). + +Looking at the next ``conflicts`` dispatch (i.e., two work-items) yields: + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/lds/mi200/ -b 12.2.4 12.2.6 --dispatch 257 -n per_kernel + <...> + -------------------------------------------------------------------------------- + 12. Local Data Share (LDS) + 12.2 LDS Stats + ╒═════════╤════════════════╤═══════╤═══════╤═══════╤═══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════╪═══════╪═══════╪═══════╪═══════════════════╡ + │ 12.2.4 │ Index Accesses │ 3.00 │ 3.00 │ 3.00 │ Cycles per kernel │ + ├─────────┼────────────────┼───────┼───────┼───────┼───────────────────┤ + │ 12.2.6 │ Bank Conflict │ 1.00 │ 1.00 │ 1.00 │ Cycles per kernel │ + ╘═════════╧════════════════╧═══════╧═══════╧═══════╧═══════════════════╛ + +Here we see a bank conflict! What happened? + +Recall that the index for each thread was calculated as: + +.. code-block:: cpp + + int index = threadIdx.x * nbanks; + +Or, precisely 32 elements, and each element is 4B wide (for a standard +integer). That is, each thread strides back to the same bank in the LDS, +such that each work-item we add to the dispatch results in another bank +conflict! + +Recalling our discussion of bank conflicts in our +:doc:`LDS ` description: + +A bank conflict occurs when two (or more) work-items in a wavefront +want to read, write, or atomically update different addresses that +map to the same bank in the same cycle. In this case, the conflict +detection hardware will determined a new schedule such that the +access is split into multiple cycles with no conflicts in any +single cycle. + +Here we see the conflict resolution hardware in action! Because we have +engineered our kernel to generate conflicts, we expect our bank conflict +metric to scale linearly with the number of work-items: + +.. figure:: ../data/profiling-by-example/ldsconflicts.png + :align: center + :alt: Comparison of LDS conflict cycles versus access cycles for our simple + example. + :width: 800 + + Comparison of LDS conflict cycles versus access cycles for our simple + example. + +Here we show the comparison of the Index Accesses (**12.2.4**), to the Bank +Conflicts (**12.2.6**) for the first 20 kernel invocations. We see that each grows +linearly, and there is a constant gap of 2 cycles between them (i.e., the first +access is never considered a conflict). + +Finally, we can use these two metrics to derive the Bank Conflict Rate (**12.1.4**). +Since within an Index Access we have 32 banks that may need to be updated, we +use: + +$$ +Bank\ Conflict\ Rate = 100 * ((Bank\ Conflicts / 32) / (Index\ Accesses - Bank\ Conflicts)) +$$ + +Plotting this, we see: + +.. figure:: ../data/profiling-by-example/ldsconflictrate.png + :align: center + :alt: LDS bank conflict rate example + :width: 800 + + LDS Bank Conflict rate for our simple example. + +The bank conflict rate linearly increases with the number of work-items +within a wavefront that are active, *approaching* 100%, but never quite +reaching it. diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/occupancy-limiters-example.rst b/projects/rocprofiler-compute/docs/tutorial/includes/occupancy-limiters-example.rst new file mode 100644 index 0000000000..dcbd6a61b1 --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/includes/occupancy-limiters-example.rst @@ -0,0 +1,456 @@ +.. _occupancy-example: + +Occupancy limiters example +========================== + +For this example, consider the +:dev-sample:`occupancy ` included with ROCm Compute Profiler. We will +investigate the use of the resource allocation panel in the +:ref:`Workgroup Manager `’s metrics section to determine occupancy +limiters. This code contains several kernels to explore how both various +kernel resources impact achieved occupancy, and how this is reported in +ROCm Compute Profiler. + +This example was compiled and run on a MI250 accelerator using ROCm +v5.6.0, and ROCm Compute Profiler v2.0.0: + +.. code-block:: shell + + $ hipcc -O3 occupancy.hip -o occupancy --save-temps + +We have again included the ``--save-temps`` flag to get the +corresponding assembly. + +Finally, we generate our ROCm Compute Profiler profile as: + +.. code-block:: shell + + $ rocprof-compute profile -n occupancy --no-roof -- ./occupancy + +.. _occupancy-experiment-design: + +Design note +----------- + +For our occupancy test, we need to create a kernel that is resource +heavy, in various ways. For this purpose, we use the following (somewhat +funny-looking) kernel: + +.. code-block:: cpp + + constexpr int bound = 16; + __launch_bounds__(256) + __global__ void vgprbound(int N, double* ptr) { + double intermediates[bound]; + for (int i = 0 ; i < bound; ++i) intermediates[i] = N * threadIdx.x; + double x = ptr[threadIdx.x]; + for (int i = 0; i < 100; ++i) { + x += sin(pow(__shfl(x, i % warpSize) * intermediates[(i - 1) % bound], intermediates[i % bound])); + intermediates[i % bound] = x; + } + if (x == N) ptr[threadIdx.x] = x; + } + +Here we try to use as many :ref:`VGPRs ` as possible, to this end: + +* We create a small array of double precision floats, that we size to try + to fit into registers (i.e., ``bound``, this may need to be tuned + depending on the ROCm version). + +* We specify ``__launch_bounds___(256)`` + to increase the number of VPGRs available to the kernel (by limiting the + number of wavefronts that can be resident on a + :doc:`CU `). + +* Write a unique non-compile time constant to each element of the array. + +* Repeatedly permute and call relatively expensive math functions on our + array elements. + +* Keep the compiler from optimizing out any operations by faking a write to the + ``ptr`` based on a run-time conditional. + +This yields a total of 122 VGPRs, but it is expected this number will +depend on the exact ROCm/compiler version. + +.. code-block:: asm + + .size _Z9vgprboundiPd, .Lfunc_end1-_Z9vgprboundiPd + ; -- End function + .section .AMDGPU.csdata + ; Kernel info: + ; codeLenInByte = 4732 + ; NumSgprs: 68 + ; NumVgprs: 122 + ; NumAgprs: 0 + ; <...> + ; AccumOffset: 124 + +We will use various permutations of this kernel to limit occupancy, and +more importantly for the purposes of this example, demonstrate how this +is reported in ROCm Compute Profiler. + +.. _vgpr-occupancy: + +VGPR limited +------------ + +For our first test, we use the ``vgprbound`` kernel discussed in the +:ref:`design note `. After profiling, we run +the analyze step on this kernel: + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 --dispatch 1 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═════════════════════════╤═════════╤══════════════╤══════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═════════════════════════╪═════════╪══════════════╪══════════════╪══════════════╪════════╡ + │ 0 │ vgprbound(int, double*) │ 1.00 │ 923093822.50 │ 923093822.50 │ 923093822.50 │ 100.00 │ + ╘════╧═════════════════════════╧═════════╧══════════════╧══════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 2. System Speed-of-Light + 2.1 Speed-of-Light + ╒═════════╤═════════════════════╤═════════╤════════════╤═════════╤═══════════════╕ + │ Index │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ + ╞═════════╪═════════════════════╪═════════╪════════════╪═════════╪═══════════════╡ + │ 2.1.15 │ Wavefront Occupancy │ 1661.24 │ Wavefronts │ 3328.00 │ 49.92 │ + ╘═════════╧═════════════════════╧═════════╧════════════╧═════════╧═══════════════╛ + + + -------------------------------------------------------------------------------- + 6. Workgroup Manager (SPI) + 6.2 Workgroup Manager - Resource Allocation + ╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡ + │ 6.2.0 │ Not-scheduled Rate (Workgroup Manager) │ 0.64 │ 0.64 │ 0.64 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.1 │ Not-scheduled Rate (Scheduler-Pipe) │ 24.94 │ 24.94 │ 24.94 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.2 │ Scheduler-Pipe Stall Rate │ 24.49 │ 24.49 │ 24.49 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.3 │ Scratch Stall Rate │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.4 │ Insufficient SIMD Waveslots │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.5 │ Insufficient SIMD VGPRs │ 94.90 │ 94.90 │ 94.90 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.6 │ Insufficient SIMD SGPRs │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.7 │ Insufficient CU LDS │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.8 │ Insufficient CU Barriers │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.9 │ Reached CU Workgroup Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.10 │ Reached CU Wavefront Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛ + + + -------------------------------------------------------------------------------- + 7. Wavefront + 7.1 Wavefront Launch Stats + ╒═════════╤══════════╤════════╤════════╤════════╤═══════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪══════════╪════════╪════════╪════════╪═══════════╡ + │ 7.1.5 │ VGPRs │ 124.00 │ 124.00 │ 124.00 │ Registers │ + ├─────────┼──────────┼────────┼────────┼────────┼───────────┤ + │ 7.1.6 │ AGPRs │ 4.00 │ 4.00 │ 4.00 │ Registers │ + ├─────────┼──────────┼────────┼────────┼────────┼───────────┤ + │ 7.1.7 │ SGPRs │ 80.00 │ 80.00 │ 80.00 │ Registers │ + ╘═════════╧══════════╧════════╧════════╧════════╧═══════════╛ + +Here we see that the kernel indeed does use *around* (but not exactly) +122 VGPRs, with the difference due to granularity of VGPR allocations. +In addition, we see that we have allocated 4 “:ref:`AGPRs `”. We +note that on current CDNA2 accelerators, the ``AccumOffset`` field of +the assembly metadata: + +.. code-block:: asm + + ; AccumOffset: 124 + +denotes the divide between ``VGPRs`` and ``AGPRs``. + +Next, we examine our wavefront occupancy (**2.1.15**), and see that we are +reaching only :math:`\sim50\%` of peak occupancy. As a result, we see +that: + +- We are not scheduling workgroups :math:`\sim25\%` of + :ref:`total scheduler-pipe cycles ` (**6.2.1**); recall + from the discussion of the `workgroup manager `, 25% is the maximum. + +- The scheduler-pipe is stalled (**6.2.2**) from scheduling workgroups due to + resource constraints for the same :math:`\sim25\%` of the time. + +- And finally, :math:`\sim91\%` of those stalls are due to a lack of SIMDs + with the appropriate number of VGPRs available (6.2.5). + +That is, the reason we can’t reach full occupancy is due to our VGPR +usage, as expected! + +LDS limited +----------- + +To examine an LDS limited example, we must change our kernel slightly: + +.. code-block:: cpp + + constexpr size_t fully_allocate_lds = 64ul * 1024ul / sizeof(double); + __launch_bounds__(256) + __global__ void ldsbound(int N, double* ptr) { + __shared__ double intermediates[fully_allocate_lds]; + for (int i = threadIdx.x ; i < fully_allocate_lds; i += blockDim.x) intermediates[i] = N * threadIdx.x; + __syncthreads(); + double x = ptr[threadIdx.x]; + for (int i = threadIdx.x; i < fully_allocate_lds; i += blockDim.x) { + x += sin(pow(__shfl(x, i % warpSize) * intermediates[(i - 1) % fully_allocate_lds], intermediates[i % fully_allocate_lds])); + __syncthreads(); + intermediates[i % fully_allocate_lds] = x; + } + if (x == N) ptr[threadIdx.x] = x; + } + +Where we now: + +* Allocate an 64 KiB LDS array per workgroup, and + +* Use our allocated LDS array instead of a register array + +Analyzing this: + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 7.1.8 --dispatch 3 + <...> + -------------------------------------------------------------------------------- + 2. System Speed-of-Light + 2.1 Speed-of-Light + ╒═════════╤═════════════════════╤════════╤════════════╤═════════╤═══════════════╕ + │ Index │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ + ╞═════════╪═════════════════════╪════════╪════════════╪═════════╪═══════════════╡ + │ 2.1.15 │ Wavefront Occupancy │ 415.52 │ Wavefronts │ 3328.00 │ 12.49 │ + ╘═════════╧═════════════════════╧════════╧════════════╧═════════╧═══════════════╛ + + + -------------------------------------------------------------------------------- + 6. Workgroup Manager (SPI) + 6.2 Workgroup Manager - Resource Allocation + ╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡ + │ 6.2.0 │ Not-scheduled Rate (Workgroup Manager) │ 0.13 │ 0.13 │ 0.13 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.1 │ Not-scheduled Rate (Scheduler-Pipe) │ 24.87 │ 24.87 │ 24.87 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.2 │ Scheduler-Pipe Stall Rate │ 24.84 │ 24.84 │ 24.84 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.3 │ Scratch Stall Rate │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.4 │ Insufficient SIMD Waveslots │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.5 │ Insufficient SIMD VGPRs │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.6 │ Insufficient SIMD SGPRs │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.7 │ Insufficient CU LDS │ 96.47 │ 96.47 │ 96.47 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.8 │ Insufficient CU Barriers │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.9 │ Reached CU Workgroup Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.10 │ Reached CU Wavefront Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛ + + + -------------------------------------------------------------------------------- + 7. Wavefront + 7.1 Wavefront Launch Stats + ╒═════════╤════════════════╤══════════╤══════════╤══════════╤═══════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════╪══════════╪══════════╪══════════╪═══════════╡ + │ 7.1.5 │ VGPRs │ 96.00 │ 96.00 │ 96.00 │ Registers │ + ├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤ + │ 7.1.6 │ AGPRs │ 0.00 │ 0.00 │ 0.00 │ Registers │ + ├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤ + │ 7.1.7 │ SGPRs │ 80.00 │ 80.00 │ 80.00 │ Registers │ + ├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤ + │ 7.1.8 │ LDS Allocation │ 65536.00 │ 65536.00 │ 65536.00 │ Bytes │ + ╘═════════╧════════════════╧══════════╧══════════╧══════════╧═══════════╛ + +We see that our VGPR allocation has gone down to 96 registers, but now +we see our 64KiB LDS allocation (**7.1.8**). In addition, we see a similar +non-schedule rate (**6.2.1**) and stall rate (**6.2.2**) as in our +:ref:`VGPR example `. However, our occupancy limiter has now +shifted from VGPRs (**6.2.5**) to LDS (**6.2.7**). + +We note that although we see the around the same scheduler/stall rates +(with our LDS limiter), our wave occupancy (**2.1.15**) is significantly +lower (:math:`\sim12\%`)! This is important to remember: the occupancy +limiter metrics in the resource allocation section tell you what the +limiter was, but *not* how much the occupancy was limited. These metrics +should always be analyzed in concert with the wavefront occupancy +metric! + +.. _sgpr-occupancy: + +SGPR limited +------------ + +Finally, we modify our kernel once more to make it limited by +:ref:`SGPRs `: + +.. code-block:: cpp + + constexpr int sgprlim = 1; + __launch_bounds__(1024, 8) + __global__ void sgprbound(int N, double* ptr) { + double intermediates[sgprlim]; + for (int i = 0 ; i < sgprlim; ++i) intermediates[i] = i; + double x = ptr[0]; + #pragma unroll 1 + for (int i = 0; i < 100; ++i) { + x += sin(pow(intermediates[(i - 1) % sgprlim], intermediates[i % sgprlim])); + intermediates[i % sgprlim] = x; + } + if (x == N) ptr[0] = x; + } + +The major changes here are to: - make as much as possible provably +uniform across the wave (notice the lack of ``threadIdx.x`` in the +``intermediates`` initialization and elsewhere), - addition of +``__launch_bounds__(1024, 8)``, which reduces our maximum VGPRs to 64 +(such that 8 waves can fit per SIMD), but causes some register spills +(i.e., :ref:`scratch ` usage), and - lower the ``bound`` (here we +use ``sgprlim``) of the array to reduce VGPR/Scratch usage. + +This results in the following assembly metadata for this kernel: + +.. code-block:: asm + + .size _Z9sgprboundiPd, .Lfunc_end3-_Z9sgprboundiPd + ; -- End function + .section .AMDGPU.csdata + ; Kernel info: + ; codeLenInByte = 4872 + ; NumSgprs: 76 + ; NumVgprs: 64 + ; NumAgprs: 0 + ; TotalNumVgprs: 64 + ; ScratchSize: 60 + ; <...> + ; AccumOffset: 64 + ; Occupancy: 8 + +Analyzing this workload yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 7.1.8 7.1.9 --dispatch 5 + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═════════════════════════╤═════════╤══════════════╤══════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═════════════════════════╪═════════╪══════════════╪══════════════╪══════════════╪════════╡ + │ 0 │ sgprbound(int, double*) │ 1.00 │ 782069812.00 │ 782069812.00 │ 782069812.00 │ 100.00 │ + ╘════╧═════════════════════════╧═════════╧══════════════╧══════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 2. System Speed-of-Light + 2.1 Speed-of-Light + ╒═════════╤═════════════════════╤═════════╤════════════╤═════════╤═══════════════╕ + │ Index │ Metric │ Avg │ Unit │ Peak │ Pct of Peak │ + ╞═════════╪═════════════════════╪═════════╪════════════╪═════════╪═══════════════╡ + │ 2.1.15 │ Wavefront Occupancy │ 3291.76 │ Wavefronts │ 3328.00 │ 98.91 │ + ╘═════════╧═════════════════════╧═════════╧════════════╧═════════╧═══════════════╛ + + + -------------------------------------------------------------------------------- + 6. Workgroup Manager (SPI) + 6.2 Workgroup Manager - Resource Allocation + ╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡ + │ 6.2.0 │ Not-scheduled Rate (Workgroup Manager) │ 7.72 │ 7.72 │ 7.72 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.1 │ Not-scheduled Rate (Scheduler-Pipe) │ 15.17 │ 15.17 │ 15.17 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.2 │ Scheduler-Pipe Stall Rate │ 7.38 │ 7.38 │ 7.38 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.3 │ Scratch Stall Rate │ 39.76 │ 39.76 │ 39.76 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.4 │ Insufficient SIMD Waveslots │ 26.32 │ 26.32 │ 26.32 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.5 │ Insufficient SIMD VGPRs │ 26.32 │ 26.32 │ 26.32 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.6 │ Insufficient SIMD SGPRs │ 25.52 │ 25.52 │ 25.52 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.7 │ Insufficient CU LDS │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.8 │ Insufficient CU Barriers │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.9 │ Reached CU Workgroup Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤ + │ 6.2.10 │ Reached CU Wavefront Limit │ 0.00 │ 0.00 │ 0.00 │ Pct │ + ╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛ + + + -------------------------------------------------------------------------------- + 7. Wavefront + 7.1 Wavefront Launch Stats + ╒═════════╤════════════════════╤═══════╤═══════╤═══════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════════════╪═══════╪═══════╪═══════╪════════════════╡ + │ 7.1.5 │ VGPRs │ 64.00 │ 64.00 │ 64.00 │ Registers │ + ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ + │ 7.1.6 │ AGPRs │ 0.00 │ 0.00 │ 0.00 │ Registers │ + ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ + │ 7.1.7 │ SGPRs │ 80.00 │ 80.00 │ 80.00 │ Registers │ + ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ + │ 7.1.8 │ LDS Allocation │ 0.00 │ 0.00 │ 0.00 │ Bytes │ + ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤ + │ 7.1.9 │ Scratch Allocation │ 60.00 │ 60.00 │ 60.00 │ Bytes/workitem │ + ╘═════════╧════════════════════╧═══════╧═══════╧═══════╧════════════════╛ + +Here we see that our wavefront launch stats (**7.1**) have changed to +reflect the metadata seen in the ``--save-temps`` output. Of particular +interest, we see: + +* The SGPR allocation (**7.1.7**) is 80 registers, slightly more than the 76 + requested by the compiler due to allocation granularity, and + +* We have a :ref:`"scratch" `, that is, private memory, + allocation of 60 bytes per work-item. + +Analyzing the resource allocation block (**6.2**) we now see that for the +first time, the "Not-scheduled Rate (Workgroup Manager)" metric (**6.2.0**) +has become non-zero. This is because the workgroup manager is +responsible for management of scratch, which we see also contributes to +our occupancy limiters in the "Scratch Stall Rate" (**6.2.3**). Note that +the sum of the workgroup manager not-scheduled rate and the +scheduler-pipe non-scheduled rate is still :math:`\sim25\%`, as in our +previous examples. + +Next, we see that the scheduler-pipe stall rate (**6.2.2**), that is, how often +we could not schedule a workgroup to a CU, was only about +:math:`\sim8\%`. This hints that perhaps, our kernel is not +*particularly* occupancy limited by resources. Indeed, checking the +wave occupancy metric (**2.1.15**) shows that this kernel is reaching nearly +99% occupancy. + +Finally, we inspect the occupancy limiter metrics and see a roughly even +split between :ref:`waveslots ` (**6.2.4**), :ref:`VGPRs ` +(**6.2.5**), and :ref:`SGPRs ` (**6.2.6**) along with the scratch stalls +(**6.2.3**) previously mentioned. + +This is yet another reminder to view occupancy holistically. While these +metrics tell you why a workgroup cannot be scheduled, they do *not* tell +you what your occupancy was (consult wavefront occupancy) *nor* whether +increasing occupancy will be beneficial to performance. diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/valu-arithmetic-instruction-mix.rst b/projects/rocprofiler-compute/docs/tutorial/includes/valu-arithmetic-instruction-mix.rst new file mode 100644 index 0000000000..dcdb46ac4d --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/includes/valu-arithmetic-instruction-mix.rst @@ -0,0 +1,113 @@ +.. _valu-arith-instruction-mix-ex: + +VALU arithmetic instruction mix +=============================== + + For this example, consider the + :dev-sample:`instruction mix sample ` distributed as a part + of ROCm Compute Profiler. + +.. note:: + + The examples in the section are expected to work on all CDNA™ accelerators. + However, the actual experiment results in this section were collected on an + :ref:`MI2XX ` accelerator. + +.. _valu-experiment-design: + +Design note +----------- + +This code uses a number of inline assembly instructions to cleanly +identify the types of instructions being issued, as well as to avoid +optimization / dead-code elimination by the compiler. While inline +assembly is inherently not portable, this example is expected to work on +all GCN™ GPUs and CDNA accelerators. + +We reproduce a sample of the kernel as follows: + +.. code-block:: cpp + + // fp32: add, mul, transcendental and fma + float f1, f2; + asm volatile( + "v_add_f32_e32 %0, %1, %0\n" + "v_mul_f32_e32 %0, %1, %0\n" + "v_sqrt_f32 %0, %1\n" + "v_fma_f32 %0, %1, %0, %1\n" + : "=v"(f1) + : "v"(f2)); + +These instructions correspond to: + +* A 32-bit floating point addition, + +* a 32-bit floating point multiplication, + +* a 32-bit floating point square-root transcendental operation, and + +* a 32-bit floating point fused multiply-add operation. + +For more detail, refer to the `CDNA2 ISA +Guide `__. + +Instruction mix +^^^^^^^^^^^^^^^ + + This example was compiled and run on a MI250 accelerator using ROCm + v5.6.0, and ROCm Compute Profiler v2.0.0. + +.. code-block:: shell + + $ hipcc -O3 instmix.hip -o instmix + +Generate the profile for this example using the following command. + +.. code-block:: shell + + $ rocprof-compute profile -n instmix --no-roof -- ./instmix + +Analyze the instruction mix section. + +.. code-block:: shell + + $ rocprof-compute analyze -p workloads/instmix/mi200/ -b 10.2 + <...> + 10. Compute Units - Instruction Mix + 10.2 VALU Arithmetic Instr Mix + ╒═════════╤════════════╤═════════╤════════════════╕ + │ Index │ Metric │ Count │ Unit │ + ╞═════════╪════════════╪═════════╪════════════════╡ + │ 10.2.0 │ INT32 │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.1 │ INT64 │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.2 │ F16-ADD │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.3 │ F16-MUL │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.4 │ F16-FMA │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.5 │ F16-Trans │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.6 │ F32-ADD │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.7 │ F32-MUL │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.8 │ F32-FMA │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.9 │ F32-Trans │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.10 │ F64-ADD │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.11 │ F64-MUL │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.12 │ F64-FMA │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.13 │ F64-Trans │ 1.00 │ Instr per wave │ + ├─────────┼────────────┼─────────┼────────────────┤ + │ 10.2.14 │ Conversion │ 1.00 │ Instr per wave │ + ╘═════════╧════════════╧═════════╧════════════════╛ + +This shows that we have exactly one of each type of VALU arithmetic instruction +by construction. diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/vector-memory-operation-counting.rst b/projects/rocprofiler-compute/docs/tutorial/includes/vector-memory-operation-counting.rst new file mode 100644 index 0000000000..4cfb875f40 --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/includes/vector-memory-operation-counting.rst @@ -0,0 +1,698 @@ +.. _vmem-example: + +Vector memory operation counting +================================ + +.. _flat-memory-ex: + +Global / Generic (FLAT) +----------------------- + +For this example, consider the +:dev-sample:`vector memory sample ` distributed as a part of +ROCm Compute Profiler. This code launches many different versions of a simple +read/write/atomic-only kernels targeting various address spaces. For example, +below is our simple ``global_write`` kernel: + +.. code-block:: cpp + + // write to a global pointer + __global__ void global_write(int* ptr, int zero) { + ptr[threadIdx.x] = zero; + } + +.. note:: + + This example was compiled and run on an MI250 accelerator using ROCm + v5.6.0, and ROCm Compute Profiler v2.0.0. + +.. code-block:: shell-session + + $ hipcc -O3 --save-temps vmem.hip -o vmem + +We have also chosen to include the ``--save-temps`` flag to save the +compiler temporary files, such as the generated CDNA assembly code, for +inspection. + +Finally, we generate our ``rocprof-compute profile`` as follows. + +.. code-block:: shell-session + + $ rocprof-compute profile -n vmem --no-roof -- ./vmem + +.. _flat-experiment-design: + +Design note +^^^^^^^^^^^ + +This section explains some of the more peculiar lines of code in the +example, for example, the use of compiler built-ins and explicit address space +casting, and so forth. + +.. code-block:: cpp + + // write to a generic pointer + typedef int __attribute__((address_space(0)))* generic_ptr; + + __attribute__((noinline)) __device__ void generic_store(generic_ptr ptr, int zero) { *ptr = zero; } + + __global__ void generic_write(int* ptr, int zero, int filter) { + __shared__ int lds[1024]; + int* generic = (threadIdx.x < filter) ? &ptr[threadIdx.x] : &lds[threadIdx.x]; + generic_store((generic_ptr)generic, zero); + } + +One of the aims of this example is to demonstrate the use of the +:llvm-docs:`"generic" FLAT ` address space. This +address space is typically used when the compiler cannot statically prove where +the backing memory is located. + +To try to *force* the compiler to use this address space, we applied +``__attribute__((noinline))`` to the ``generic_store`` function to have the +compiler treat it as a function call (that is, on the other side of which, the +address space may not be known). However, in a trivial example such as this, the +compiler may choose to specialize the ``generic_store`` function to the two +address spaces that might provably be used from our translation unit, that is, +:ref:`"local" (or, LDS) ` and :ref:`"global" `. +Hence, we forcibly cast the address space to +:ref:`"generic" (or, FLAT) ` to avoid this compiler +optimization. + +.. warning:: + + While convenient for this example, this sort of explicit address space + casting can lead to strange compilation errors, and in the worst case, + incorrect results. As a result, use is discouraged in production code. + +For more details on address spaces, refer to +:ref:`memory-spaces`. + +Global write +^^^^^^^^^^^^ + +First, we demonstrate our simple ``global_write`` kernel: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 1 -b 10.3 15.1.4 15.1.5 15.1.6 15.1.7 15.1.8 15.1.9 15.1.10 15.1.11 -n per_kernel + <...> + -------------------------------------------------------------------------------- + 0. Top Stat + ╒════╤═════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪═════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ global_write(int*, int) [clone .kd] │ 1.00 │ 2400.00 │ 2400.00 │ 2400.00 │ 100.00 │ + ╘════╧═════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 15. Address Processing Unit and Data Return Path (TA/TD) + 15.1 Address Processing Unit + ╒═════════╤═════════════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 15.1.4 │ Total Instructions │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.5 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.6 │ Global/Generic Read Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.7 │ Global/Generic Write Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.8 │ Global/Generic Atomic Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.9 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.10 │ Spill/Stack Read Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 15.1.11 │ Spill/Stack Write Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═════════════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + +Here, we have presented both the information in the VMEM Instruction Mix +table (**10.3**) and the Address Processing Unit (**15.1**). We note that this +data is expected to be identical, and hence we omit table 15.1 in our +subsequent examples. + +In addition, as expected, we see a single Global/Generic Write +instruction (**10.3.2**, **15.1.7**). Inspecting the generated assembly, we get: + +.. code-block:: asm + + .protected _Z12global_writePii ; -- Begin function _Z12global_writePii + .globl _Z12global_writePii + .p2align 8 + .type _Z12global_writePii,@function + _Z12global_writePii: ; @_Z12global_writePii + ; %bb.0: + s_load_dword s2, s[4:5], 0x8 + s_load_dwordx2 s[0:1], s[4:5], 0x0 + v_lshlrev_b32_e32 v0, 2, v0 + s_waitcnt lgkmcnt(0) + v_mov_b32_e32 v1, s2 + global_store_dword v0, v1, s[0:1] + s_endpgm + .section .rodata,#alloc + .p2align 6, 0x0 + .amdhsa_kernel _Z12global_writePii + +Notice that this corresponds to an instance of a ``global_store_dword`` +operation. + +.. note:: + + The assembly in these experiments were generated for an + :ref:`MI2XX ` accelerator using ROCm 5.6.0, and may change + depending on ROCm versions and the targeted hardware architecture. + +.. _generic-write-ex: + +Generic write to LDS +^^^^^^^^^^^^^^^^^^^^ + +Next, we examine a generic write. As discussed +:ref:`previously `, our ``generic_write`` kernel uses an +address space cast to *force* the compiler to choose our desired address +space, regardless of other optimizations that may be possible. + +Also note that the ``filter`` parameter passed in as a kernel argument (see +:dev-sample:`example ` and +:ref:`design note `) is set to zero on the host, such +that we always write to the :doc:`local ` (LDS) +memory allocation ``lds``. + +Examining this kernel in the VMEM Instruction Mix table yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 2 -b 10.3 -n per_kernel + <...> + 0. Top Stat + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ generic_write(int*, int, int) [clone .kd │ 1.00 │ 2880.00 │ 2880.00 │ 2880.00 │ 100.00 │ + │ │ ] │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + +As expected we see a single generic write (**10.3.2**). In the assembly +generated for this kernel (in particular, we care about the +``generic_store`` function), we see that this corresponds to a +``flat_store_dword`` instruction: + +.. code-block:: asm + + .type _Z13generic_storePii,@function + _Z13generic_storePii: ; @_Z13generic_storePii + ; %bb.0: + s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + flat_store_dword v[0:1], v2 + s_waitcnt vmcnt(0) lgkmcnt(0) + s_setpc_b64 s[30:31] + .Lfunc_end0: + +In addition, we note that we can observe the destination of this request +by looking at the LDS Instructions metric (**12.2.0**) -- which indicates one LDS +access. + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 2 -b 12.2.0 -n per_kernel + <...> + 12. Local Data Share (LDS) + 12.2 LDS Stats + ╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 12.2.0 │ LDS Instrs │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛ + +.. note:: + + Exercise for the reader: if this access had been targeted at global memory + (for instance, by changing value of ``filter``), where should we look for the + memory traffic? Hint: see the :ref:`generic read ` example. + +.. _global-read-ex: + +Global read +^^^^^^^^^^^ + +Next, we examine a simple global read operation: + +.. code-block:: cpp + + __global__ void global_read(int* ptr, int zero) { + int x = ptr[threadIdx.x]; + if (x != zero) { + ptr[threadIdx.x] = x + 1; + } + } + +Here we observe a now familiar pattern: + +- Read a value in from global memory. + +- Have a write hidden behind a conditional that is impossible for + the compiler to statically eliminate, but is identically false. In this + case, our ``main()`` function initializes the data in ``ptr`` to zero. + +Running ROCm Compute Profiler on this kernel yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 3 -b 10.3 -n per_kernel + <...> + 0. Top Stat + ╒════╤════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ global_read(int*, int) [clone .kd] │ 1.00 │ 4480.00 │ 4480.00 │ 4480.00 │ 100.00 │ + ╘════╧════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + +Here we see a single global/generic instruction (**10.3.0**) which, as +expected, is a read (**10.3.1**). + +.. _generic-read-ex: + +Generic read from global memory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For our generic read example, we choose to change our target for the +generic read to be global memory: + +.. code-block:: cpp + + __global__ void generic_read(int* ptr, int zero, int filter) { + __shared__ int lds[1024]; + if (static_cast(filter - 1) == zero) { + lds[threadIdx.x] = 0; // initialize to zero to avoid conditional, but hide behind _another_ conditional + } + int* generic; + if (static_cast(threadIdx.x) > filter - 1) { + generic = &ptr[threadIdx.x]; + } else { + generic = &lds[threadIdx.x]; + abort(); + } + int x = generic_load((generic_ptr)generic); + if (x != zero) { + ptr[threadIdx.x] = x + 1; + } + } + +In addition to our usual ``if (condition_that_wont_happen)`` guard +around the write operation, there is an additional conditional around +the initialization of the ``lds`` buffer. We note that it’s typically +required to write to this buffer to prevent the compiler from +eliminating the local memory branch entirely due to undefined behavior +(use of an uninitialized value). However, to report *only* our global +memory read, we again hide this initialization behind an identically +false conditional (both ``zero`` and ``filter`` are set to zero in the +kernel launch). Note that this is a *different* conditional from our +pointer assignment (to avoid combination of the two). + +Running ROCm Compute Profiler on this kernel reports: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 4 -b 10.3 12.2.0 16.3.10 -n per_kernel + <...> + 0. Top Stat + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ generic_read(int*, int, int) [clone .kd] │ 1.00 │ 2240.00 │ 2240.00 │ 2240.00 │ 100.00 │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 12. Local Data Share (LDS) + 12.2 LDS Stats + ╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 12.2.0 │ LDS Instrs │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 16. Vector L1 Data Cache + 16.3 L1D Cache Accesses + ╒═════════╤════════════╤═══════╤═══════╤═══════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════╪═══════╪═══════╪═══════╪════════════════╡ + │ 16.3.10 │ L1-L2 Read │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ + ╘═════════╧════════════╧═══════╧═══════╧═══════╧════════════════╛ + +Here we observe: + +- A single global/generic read operation (**10.3.1**), which + +- Is not an LDS instruction (**12.2**), as seen in the + :ref:`generic write ` example, but is instead + +- An L1-L2 read operation (**16.3.10**) + +That is, we have successfully targeted our generic read at global +memory. Inspecting the assembly shows this corresponds to a +``flat_load_dword`` instruction. + +.. _global-atomic-ex: + +Global atomic +^^^^^^^^^^^^^ + +Our global atomic kernel simply atomically adds a (non-compile-time) zero value +to a pointer. + +.. code-block:: cpp + + __global__ void global_atomic(int* ptr, int zero) { + atomicAdd(ptr, zero); + } + + +Running ROCm Compute Profiler on this kernel yields: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 5 -b 10.3 16.3.12 -n per_kernel + <...> + 0. Top Stat + ╒════╤══════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪══════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ global_atomic(int*, int) [clone .kd] │ 1.00 │ 4640.00 │ 4640.00 │ 4640.00 │ 100.00 │ + ╘════╧══════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + -------------------------------------------------------------------------------- + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 16. Vector L1 Data Cache + 16.3 L1D Cache Accesses + ╒═════════╤══════════════╤═══════╤═══════╤═══════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪══════════════╪═══════╪═══════╪═══════╪════════════════╡ + │ 16.3.12 │ L1-L2 Atomic │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ + ╘═════════╧══════════════╧═══════╧═══════╧═══════╧════════════════╛ + +Here we see a single global/generic atomic instruction (**10.3.3**), which +corresponds to an L1-L2 atomic request (**16.3.12**). + +.. _generic-mixed-atomic-ex: + +Generic, mixed atomic +^^^^^^^^^^^^^^^^^^^^^ + +In our final global/generic example, we look at a case where our generic +operation targets both LDS and global memory: + +.. code-block:: cpp + + __global__ void generic_atomic(int* ptr, int filter, int zero) { + __shared__ int lds[1024]; + int* generic = (threadIdx.x % 2 == filter) ? &ptr[threadIdx.x] : &lds[threadIdx.x]; + generic_atomic((generic_ptr)generic, zero); + } + +This assigns every other work-item to atomically update global memory or +local memory. + +Running this kernel through ROCm Compute Profiler shows: + +.. code-block:: shell-session + + $ rocprof-compute analyze -p workloads/vmem/mi200/ --dispatch 6 -b 10.3 12.2.0 16.3.12 -n per_kernel + <...> + 0. Top Stat + ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕ + │ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ + ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡ + │ 0 │ generic_atomic(int*, int, int) [clone .k │ 1.00 │ 3360.00 │ 3360.00 │ 3360.00 │ 100.00 │ + │ │ d] │ │ │ │ │ │ + ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛ + + + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 12. Local Data Share (LDS) + 12.2 LDS Stats + ╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 12.2.0 │ LDS Instrs │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 16. Vector L1 Data Cache + 16.3 L1D Cache Accesses + ╒═════════╤══════════════╤═══════╤═══════╤═══════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪══════════════╪═══════╪═══════╪═══════╪════════════════╡ + │ 16.3.12 │ L1-L2 Atomic │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ + ╘═════════╧══════════════╧═══════╧═══════╧═══════╧════════════════╛ + +That is, we see: + +- A single generic atomic instruction (**10.3.3**) that maps to both + +- An LDS instruction (**12.2.0**), and + +- An L1-L2 atomic request (**16.3**) + +We have demonstrated the ability of the generic address space to +*dynamically* target different backing memory. + +.. _spill-scratch: + +Spill/Scratch (BUFFER) +---------------------- + +Next we examine the use of "Spill/Scratch" memory. On current CDNA +accelerators such as the :ref:`MI2XX `, this is implemented using +the :ref:`private ` memory space, which maps to +:llvm-docs:`"scratch" memory ` in AMDGPU hardware +terminology. This type of memory can be accessed via different instructions +depending on the specific architecture targeted. However, current CDNA +accelerators such as the :ref:`MI2XX ` use so called ``buffer`` +instructions to access private memory in a simple (and typically) coalesced +manner. See +:mi200-isa-pdf:`Sec. 9.1, "Vector Memory Buffer Instructions" of the CDNA2 ISA guide <>` +for further reading on this instruction type. + +We develop a `simple +kernel `__ +that uses stack memory: + +.. code-block:: cpp + + #include + __global__ void knl(int* out, int filter) { + int x[1024]; + x[filter] = 0; + if (threadIdx.x < filter) + out[threadIdx.x] = x[threadIdx.x]; + } + +Our strategy here is to: + +* Create a large stack buffer (that cannot reasonably fit into registers) - Write to a compile-time unknown + location on the stack, and then + +* Behind the typical compile-time unknown ``if(condition_that_wont_happen)`` + +* Read from a different, compile-time unknown, location on the stack and write + to global memory to prevent the compiler from optimizing it out. + +This example was compiled and run on an MI250 accelerator using ROCm v5.6.0, and +ROCm Compute Profiler v2.0.0. + +.. code-block:: shell-session + + $ hipcc -O3 stack.hip -o stack.hip + +And profiled using ROCm Compute Profiler: + +.. code-block:: shell-session + + $ rocprof-compute profile -n stack --no-roof -- ./stack + <...> + $ rocprof-compute analyze -p workloads/stack/mi200/ -b 10.3 16.3.11 -n per_kernel + <...> + 10. Compute Units - Instruction Mix + 10.3 VMEM Instr Mix + ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡ + │ 10.3.0 │ Global/Generic Instr │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.1 │ Global/Generic Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.2 │ Global/Generic Write │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.3 │ Global/Generic Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.4 │ Spill/Stack Instr │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.5 │ Spill/Stack Read │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.6 │ Spill/Stack Write │ 1.00 │ 1.00 │ 1.00 │ Instr per kernel │ + ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤ + │ 10.3.7 │ Spill/Stack Atomic │ 0.00 │ 0.00 │ 0.00 │ Instr per kernel │ + ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛ + + + -------------------------------------------------------------------------------- + 16. Vector L1 Data Cache + 16.3 L1D Cache Accesses + ╒═════════╤═════════════╤═══════╤═══════╤═══════╤════════════════╕ + │ Index │ Metric │ Avg │ Min │ Max │ Unit │ + ╞═════════╪═════════════╪═══════╪═══════╪═══════╪════════════════╡ + │ 16.3.11 │ L1-L2 Write │ 1.00 │ 1.00 │ 1.00 │ Req per kernel │ + ╘═════════╧═════════════╧═══════╧═══════╧═══════╧════════════════╛ + +Here we see a single write to the stack (**10.3.6**), which corresponds to +an L1-L2 write request (**16.3.11**), that is, the stack is backed by global +memory and travels through the same memory hierarchy. diff --git a/projects/rocprofiler-compute/docs/tutorial/learning-resources.rst b/projects/rocprofiler-compute/docs/tutorial/learning-resources.rst new file mode 100644 index 0000000000..96ccba3b0c --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/learning-resources.rst @@ -0,0 +1,23 @@ +.. meta:: + :description: ROCm Compute Profiler external training resources + :keywords: ROCm Compute Profiler, examples, tutorials, videos, lesson, lessons, how + +****************** +Learning resources +****************** + +This section provides a curated list of external resources and third-party +content to support learning the ROCm Compute Profiler. Some information in +these materials may be outdated. + +ROCm Compute Profiler was previously known as Omniperf. Some of the following +resources use the earlier name. + +Introduction to ROCm Compute Profiler + :fab:`youtube` `AMD profiling workshop (Pawsey Supercomputing Research Centre) `_ + +ROCm Compute Profiler example exercises + ``__ + +AMD Instinct™ tuning guides + :doc:`rocm:how-to/rocm-for-ai/inference-optimization/workload` diff --git a/projects/rocprofiler-compute/docs/tutorial/profiling-by-example.rst b/projects/rocprofiler-compute/docs/tutorial/profiling-by-example.rst new file mode 100644 index 0000000000..faa00b5ec3 --- /dev/null +++ b/projects/rocprofiler-compute/docs/tutorial/profiling-by-example.rst @@ -0,0 +1,23 @@ +.. meta:: + :description: ROCm Compute Profiler: Profiling by example + :keywords: ROCm Compute Profiler, ROCm, profiler, tool, Instinct, accelerator, AMD + +******************** +Profiling by example +******************** + +The following examples refer to sample :doc:`HIP ` code located in +:fab:`github` :dev-sample:`ROCm/rocprofiler-compute/blob/amd-mainline/sample <>` +and distributed as part of ROCm Compute Profiler. + +.. include:: ./includes/valu-arithmetic-instruction-mix.rst + +.. include:: ./includes/infinity-fabric-transactions.rst + +.. include:: ./includes/vector-memory-operation-counting.rst + +.. include:: ./includes/instructions-per-cycle-and-utilizations.rst + +.. include:: ./includes/lds-examples.rst + +.. include:: ./includes/occupancy-limiters-example.rst diff --git a/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst b/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst new file mode 100644 index 0000000000..ce96499f46 --- /dev/null +++ b/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst @@ -0,0 +1,128 @@ +.. meta:: + :description: What is ROCm Compute Profiler? + :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD + +****************************** +What is ROCm Compute Profiler? +****************************** + +ROCm Compute Profiler is a kernel-level profiling tool for machine learning and high +performance computing (HPC) workloads running on AMD Instinct™ accelerators. + +AMD Instinct MI-series accelerators are data center-class GPUs designed for +compute and have some graphics capabilities disabled or removed. +ROCm Compute Profiler primarily targets use with +:doc:`accelerators in the MI300, MI200, and MI100 families `. +Development is in progress to support Radeon™ (RDNA) GPUs. + +ROCm Compute Profiler is built on top of :doc:`ROCProfiler ` to +monitor hardware performance counters. + +.. _high-level-design: + +High-level design +================= + +The architecture of ROCm Compute Profiler consists of three major components shown in the +following diagram. + +Core ROCm Compute Profiler +-------------------------- + +Acquires raw performance counters via application replay using ``rocprof``. +Counters are stored in a comma-separated-values format for further +:doc:`analysis `. It runs a set of accelerator-specific +micro-benchmarks to acquire hierarchical roofline data. The roofline model is +not available on accelerators pre-MI200. + +Grafana server for ROCm Compute Profiler +---------------------------------------- + +* **Grafana database import**: All raw performance counters are imported into + a :ref:`backend MongoDB database ` to support + analysis and visualization in the Grafana GUI. Compatibility with + previously generated data using older ROCm Compute Profiler versions is not guaranteed. + +* **Grafana analysis dashboard GUI**: The + :doc:`Grafana dashboard ` retrieves the raw + counters information from the backend database. It displays the relevant + performance metrics and visualization. + +ROCm Compute Profiler standalone GUI analyzer +--------------------------------------------- + +ROCm Compute Profiler provides a :doc:`standalone GUI ` to +enable basic performance analysis without the need to import data into a +database instance. Find setup instructions in :doc:`install/grafana-setup` + +.. image:: data/install/omniperf_server_vs_client_install.png + :align: center + :alt: Architectural design of ROCm Compute Profiler + :width: 800 + +Features +======== + +ROCm Compute Profiler offers comprehensive profiling based on all available hardware counters +for the target accelerator. It delivers advanced performance analysis features, +such as system Speed-of-Light (SOL) and hardware block-level SOL evaluations. +Additionally, ROCm Compute Profiler provides in-depth memory chart analysis, roofline +analysis, baseline comparisons, and more, ensuring a thorough understanding of +system performance. + +ROCm Compute Profiler supports analysis through both the :doc:`command line ` or a +:doc:`GUI `. The following list describes ROCm Compute Profiler's features at a +high level. + +* :doc:`Support for AMD Instinct MI300, MI200, and MI100 accelerators ` + +* :doc:`Standalone GUI analyzer ` + +* :doc:`GUI analyzer via Grafana and MongoDB ` + + * :ref:`System Info panel ` + + * :ref:`Kernel Statistic panel ` + + * :ref:`System Speed-of-Light panel ` + + * :ref:`Memory Chart Analysis panel ` + + * :ref:`Roofline Analysis panel ` (Supported on MI200 and above architectures only) + + * :ref:`Command Processor (CP) panel ` + + * :ref:`Workgroup Manager (SPI) panel ` + + * :ref:`Wavefront Launch panel ` + + * :ref:`Compute Unit - Instruction Mix panel ` + + * :ref:`Compute Unit - Pipeline panel ` + + * :ref:`Local Data Share (LDS) panel ` + + * :ref:`Instruction Cache panel ` + + * :ref:`Scalar L1D Cache panel ` + + * :ref:`L1 Address Processing Unit or Texture Addresser (TA) `; + and :ref:`L1 Backend Data Processing Unit or Texture Data (TD) ` panels + + * :ref:`Vector L1D Cache panel ` + + * :ref:`L2 Cache panel ` + + * :ref:`L2 Cache (per-channel) panel ` + +* :ref:`Filtering ` to reduce profiling time + + * Filtering by dispatch + + * Filter by kernel + + * Filtering by GPU ID + +* :ref:`Baseline comparisons ` + +* :ref:`Multiple normalizations ` diff --git a/projects/rocprofiler-compute/grafana/.dockerignore b/projects/rocprofiler-compute/grafana/.dockerignore new file mode 100644 index 0000000000..21e7f702ff --- /dev/null +++ b/projects/rocprofiler-compute/grafana/.dockerignore @@ -0,0 +1 @@ +/dashboards diff --git a/projects/rocprofiler-compute/grafana/Dockerfile b/projects/rocprofiler-compute/grafana/Dockerfile new file mode 100644 index 0000000000..19ee5efbd0 --- /dev/null +++ b/projects/rocprofiler-compute/grafana/Dockerfile @@ -0,0 +1,73 @@ +# ----------------------------------------------------------------------- +# NOTE: +# Dependencies are not included as part of ROCm Compute Profiler. +# It's the user's responsibility to accept any licensing implications +# before building the project +# ----------------------------------------------------------------------- + +FROM ubuntu:22.04 +WORKDIR /app + +USER root + +ENV DEBIAN_FRONTEND noninteractive +ENV TZ "US/Chicago" +ENV NVM_DIR /usr/local/nvm +ENV NODE_VERSION 20.12.2 + +ADD plugins/rocprofiler-compute_plugin /var/lib/grafana/plugins/rocprofiler-compute_plugin + +# Install Grafana and MongoDB Community Edition +# Note: Grafana install is stubbed to 10.4.3 +RUN apt-get update && \ + apt-get install -y adduser libfontconfig1 musl wget && \ + wget -q https://dl.grafana.com/enterprise/release/grafana-enterprise_10.4.3_amd64.deb && \ + dpkg -i grafana-enterprise_10.4.3_amd64.deb && \ + apt-get install -y gnupg curl && \ + curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | gpg -o /usr/share/keyrings/mongodb-server-7.0.gpg --dearmor && \ + echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-7.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/7.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-7.0.list && \ + apt-get update && \ + apt-get install -y mongodb-org + +RUN mkdir /usr/local/nvm && \ + curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash && \ + . $NVM_DIR/nvm.sh && \ + nvm install $NODE_VERSION && \ + nvm alias default $NODE_VERSION && \ + nvm use default + +ENV NODE_PATH $NVM_DIR/v$NODE_VERSION/lib/node_modules +ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH + +RUN npm --version && \ + node --version + +# Install Grafana plugins +RUN apt-get install -y tzdata systemd apt-utils npm vim net-tools && \ + /usr/sbin/grafana-cli plugins install michaeldmoore-multistat-panel && \ + /usr/sbin/grafana-cli plugins install ae3e-plotly-panel && \ + /usr/sbin/grafana-cli plugins install natel-plotly-panel && \ + /usr/sbin/grafana-cli plugins install grafana-image-renderer && \ + /usr/sbin/grafana-cli plugins install aceiot-svg-panel && \ + chown root:grafana /etc/grafana && \ + cd /var/lib/grafana/plugins/rocprofiler-compute_plugin && \ + npm install && \ + npm run build && \ + apt-get autoremove -y && \ + apt-get autoclean -y && \ + sed -i "s/ bindIp.*/ bindIp: 0.0.0.0/" /etc/mongod.conf && \ + mkdir -p /var/lib/grafana && \ + touch /var/lib/grafana/grafana.lib && \ + chown grafana:grafana /var/lib/grafana/grafana.lib + +# Overwrite grafana ini file +COPY grafana.ini /etc/grafana + +# Switch Grafana port to 4000 +RUN sed -i "s/^;http_port = 3000/http_port = 4000/" /etc/grafana/grafana.ini && \ + sed -i "s/^http_port = 3000/http_port = 4000/" /usr/share/grafana/conf/defaults.ini + +# Starts mongo and grafana-server at startup +COPY docker-entrypoint.sh /docker-entrypoint.sh + +ENTRYPOINT [ "/docker-entrypoint.sh" ] diff --git a/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.3_pub.json b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.3_pub.json new file mode 100644 index 0000000000..aecf5d7ccf --- /dev/null +++ b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.3_pub.json @@ -0,0 +1,13325 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "iteration": 1667231289239, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 217, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 23, + "w": 13, + "x": 0, + "y": 1 + }, + "id": 159, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.sysinfo.aggregate([\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.sysinfo.aggregate([\n {\"$match\": {\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(System Info)\"}}\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "System Info", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true + }, + "indexByName": {}, + "renameByName": { + "Value 1": "Current", + "Value 2": "Baseline" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Info", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 108, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "decimals": 0, + "links": [], + "mappings": [ + { + "options": { + "match": "false", + "result": { + "index": 0 + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text" + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Percent of Peak - PoP" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.width", + "value": 252 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit 1" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 137 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 125 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 161 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 15, + "x": 0, + "y": 2 + }, + "id": 110, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n },\n\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs,\n \"Unit\": \"CUs\",\n \"peak\": $numCU,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n },\n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }}\n\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2, 4] }] }\n },\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs2,\n \"Unit\": \"CUs\",\n \"peak\": $numCU2,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs2] }, $numCU2]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk2, $numCU2, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n }, \n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU2, $numCU2] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU2, $numCU2] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC2, { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]\n }}\n\n ]);", + "type": "table" + } + ], + "title": "Speed of Light", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Metric 1": 0, + "Metric 2": 7, + "Percent of Peak - PoP 1": 5, + "Percent of Peak - PoP 2": 6, + "Unit 1": 8, + "Unit 2": 9, + "Value 1": 1, + "Value 2": 2, + "peak 1": 3, + "peak 2": 4 + }, + "renameByName": { + "Percent of Peak - PoP": "Pct-of-Peak", + "Percent of Peak - PoP 1": "Pct-of-Peak (Current)", + "Percent of Peak - PoP 2": "Pct-of-Peak (Baseline)", + "Unit": "", + "Value": "Avg", + "Value 1": "Avg (Current)", + "Value 2": "Avg (Baseline)", + "peak": "Theoretical Max", + "peak 1": "Theoretical Max (Current)", + "peak 2": "Theoretical Max (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 175, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n],\n{ allowDiskUse: true }\n);", + "type": "table" + } + ], + "title": "Dispatch IDs - Current", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 215, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Dispatch IDs - Baseline", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Speed-of-Light", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 36, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 157, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + } + }, + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "u5Z2zJhnk" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"elapsedTime1\": {\n \"$divide\": [{\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}, 1000]\n }\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"elapsedTime1\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Kernel Time Histogram", + "transparent": true, + "type": "histogram" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 123 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Performance" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Peak FLOPs" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 213, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "L1 Cache (Bytes)" + } + ] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128 ]} \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n \n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n }}\n]);", + "type": "table" + } + ], + "title": "Top Kernels", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Name", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "peak_flops": "Peak FLOPs", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS " + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 87 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 153 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS (Bytes)" + }, + "properties": [ + { + "id": "custom.width", + "value": 98 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + }, + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dispatch" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 251, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&Index\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n\n }}\n]);", + "type": "table" + } + ], + "title": "Top Dispatches", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "peak_flops": 19, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS ", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Dispatch", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Kernel Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 40, + "panels": [ + { + "description": "All transaction units default to Billion, when per-sec norm is used", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 285, + "options": { + "addAllIDs": false, + "captureMappings": false, + "eventAutoComplete": true, + "eventSource": "options.animateLogo(svgmap, data);\r\nconsole.log(\"Starting render\");\r\nlet buff = data.series[0].fields[2].values.buffer;\r\nlet valueCount = buff.length;\r\nconsole.log(\"The buff is \", valueCount, \" long\");\r\n\r\nsvgmap.wave_life_.text(buff[0]);\r\nsvgmap.active_cu_.text(buff[1]);\r\nsvgmap.salu_.text(buff[2]);\r\nsvgmap.smem_.text(buff[3]);\r\nsvgmap.valu_.text(buff[4]);\r\nsvgmap.mfma_.text(buff[5]);\r\nsvgmap.vmem_.text(buff[6]);\r\nsvgmap.lds_.text(buff[7]);\r\nsvgmap.gws_.text(buff[8]);\r\nsvgmap.br_.text(buff[9]);\r\nsvgmap.vgpr_.text(buff[10]);\r\nsvgmap.sgpr_.text(buff[11]);\r\nsvgmap.lds_alloc_.text(buff[12]);\r\nsvgmap.scratch_alloc_.text(buff[13]);\r\nsvgmap.wavefronts_.text(buff[14]);\r\nsvgmap.workgroups_.text(buff[15]);\r\nsvgmap.lds_req_.text(buff[16]);\r\nsvgmap.il1_fetch_.text(buff[17]);\r\nsvgmap.il1_hit_.text(buff[18]);\r\nsvgmap.il1_l2_rd_.text(buff[19]);\r\nsvgmap.sl1_rd_.text(buff[20]);\r\nsvgmap.sl1_hit_.text(buff[21]);\r\nsvgmap.sl1_l2_rd_.text(buff[22]);\r\nsvgmap.sl1_l2_wr_.text(buff[23]);\r\nsvgmap.sl1_l2_atom_.text(buff[24]);\r\nsvgmap.vl1_rd_.text(buff[25]);\r\nsvgmap.vl1_wr_.text(buff[26]);\r\nsvgmap.vl1_atom_.text(buff[27]);\r\nsvgmap.vl1_hit_.text(buff[28]);\r\nsvgmap.vl1_lat_.text(buff[29]);\r\nsvgmap.vl1_l2_rd_.text(buff[30]);\r\nsvgmap.vl1_l2_wr_.text(buff[31]);\r\nsvgmap.vl1_l2_atom_.text(buff[32]);\r\nsvgmap.l2_rd_.text(buff[33]);\r\nsvgmap.l2_wr_.text(buff[34])\r\nsvgmap.l2_atom_.text(buff[35]);\r\nsvgmap.l2_hit_.text(buff[36]);\r\nsvgmap.l2_rd_lat_.text(buff[37]);\r\nsvgmap.l2_wr_lat_.text(buff[38]);\r\nsvgmap.fabric_rd_lat_.text(buff[39]);\r\nsvgmap.fabric_wr_lat_.text(buff[40]);\r\nsvgmap.fabric_atom_lat_.text(buff[41]);\r\nsvgmap.l2_fabric_rd_.text(buff[42]);\r\nsvgmap.l2_fabric_wr_.text(buff[43]);\r\nsvgmap.l2_fabric_atom_.text(buff[44]);\r\nsvgmap.hbm_rd_.text(buff[45]);\r\nsvgmap.hbm_wr_.text(buff[46]);\r\nsvgmap.lds_util_.text(buff[47]);\r\nsvgmap.vl1_coales_.text(buff[48]);\r\nsvgmap.vl1_stall_.text(buff[49]);\r\nsvgmap.wave_occ_.text(buff[50]);\r\nsvgmap.lds_lat_.text(buff[51]);\r\nsvgmap.il1_lat_.text(buff[52]);\r\nsvgmap.sl1_lat_.text(buff[53]);\r\nsvgmap.gds_req_.text(buff[54]);", + "initAutoComplete": true, + "initSource": "options.animateLogo = (svgmap, data) => {\r\n \r\n}\r\n ", + "svgMappings": [ + { + "mappedName": "wave_life_", + "svgId": "wave_life" + }, + { + "mappedName": "wave_occ_", + "svgId": "wave_occ" + }, + { + "mappedName": "salu_", + "svgId": "salu" + }, + { + "mappedName": "smem_", + "svgId": "smem" + }, + { + "mappedName": "valu_", + "svgId": "valu" + }, + { + "mappedName": "mfma_", + "svgId": "mfma" + }, + { + "mappedName": "vmem_", + "svgId": "vmem" + }, + { + "mappedName": "lds_", + "svgId": "lds" + }, + { + "mappedName": "gws_", + "svgId": "gws" + }, + { + "mappedName": "br_", + "svgId": "br" + }, + { + "mappedName": "active_cu_", + "svgId": "active_cu" + }, + { + "mappedName": "vgpr_", + "svgId": "vgpr" + }, + { + "mappedName": "sgpr_", + "svgId": "sgpr" + }, + { + "mappedName": "lds_alloc_", + "svgId": "lds_alloc" + }, + { + "mappedName": "scratch_alloc_", + "svgId": "scratch_alloc" + }, + { + "mappedName": "wavefronts_", + "svgId": "wavefronts" + }, + { + "mappedName": "workgroups_", + "svgId": "workgroups" + }, + { + "mappedName": "lds_req_", + "svgId": "lds_req" + }, + { + "mappedName": "vl1_wr_", + "svgId": "vl1_wr" + }, + { + "mappedName": "vl1_atom_", + "svgId": "vl1_atom" + }, + { + "mappedName": "sl1_rd_", + "svgId": "sl1_rd" + }, + { + "mappedName": "il1_fetch_", + "svgId": "il1_fetch" + }, + { + "mappedName": "lds_lat_", + "svgId": "lds_lat" + }, + { + "mappedName": "lds_bw_", + "svgId": "lds_bw" + }, + { + "mappedName": "lds_util_", + "svgId": "lds_util" + }, + { + "mappedName": "vl1_hit_", + "svgId": "vl1_hit" + }, + { + "mappedName": "vl1_lat_", + "svgId": "vl1_lat" + }, + { + "mappedName": "vl1_coales_", + "svgId": "vl1_coales" + }, + { + "mappedName": "vl1_stall_", + "svgId": "vl1_stall" + }, + { + "mappedName": "sl1_hit_", + "svgId": "sl1_hit" + }, + { + "mappedName": "sl1_lat_", + "svgId": "sl1_lat" + }, + { + "mappedName": "il1_hit_", + "svgId": "il1_hit" + }, + { + "mappedName": "il1_lat_", + "svgId": "il1_lat" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "il1_l2_rd_", + "svgId": "il1_l2_rd" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "l2_rd_", + "svgId": "l2_rd" + }, + { + "mappedName": "l2_wr_", + "svgId": "l2_wr" + }, + { + "mappedName": "l2_atom_", + "svgId": "l2_atom" + }, + { + "mappedName": "l2_hit_", + "svgId": "l2_hit" + }, + { + "mappedName": "l2_rd_lat_", + "svgId": "l2_rd_lat" + }, + { + "mappedName": "l2_wr_lat_", + "svgId": "l2_wr_lat" + }, + { + "mappedName": "l2_fabric_rd_", + "svgId": "l2_fabric_rd" + }, + { + "mappedName": "l2_fabric_wr_", + "svgId": "l2_fabric_wr" + }, + { + "mappedName": "l2_fabric_atom_", + "svgId": "l2_fabric_atom" + }, + { + "mappedName": "fabric_rd_lat_", + "svgId": "fabric_rd_lat" + }, + { + "mappedName": "fabric_wr_lat_", + "svgId": "fabric_wr_lat" + }, + { + "mappedName": "fabric_atom_lat_", + "svgId": "fabric_atom_lat" + }, + { + "mappedName": "fabric_hbm_rd_", + "svgId": "fabric_hbm_rd" + }, + { + "mappedName": "fabric_hbm_wr_", + "svgId": "fabric_hbm_wr" + }, + { + "mappedName": "vl1_rd_", + "svgId": "vl1_rd" + }, + { + "mappedName": "vl1_l2_rd_", + "svgId": "vl1_l2_rd" + }, + { + "mappedName": "vl1_l2_wr_", + "svgId": "vl1_l2_wr" + }, + { + "mappedName": "vl1_l2_atom_", + "svgId": "vl1_l2_atom" + }, + { + "mappedName": "hbm_rd_", + "svgId": "hbm_rd" + }, + { + "mappedName": "hbm_wr_", + "svgId": "hbm_wr" + } + ], + "svgSource": "\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n Wave Occupancy\r\n \r\n Wave Life\r\n \r\n \r\n \r\n xGMI /\r\n PCIe\r\n \r\n GMI\r\n \r\n HBM\r\n \r\n Fabric\r\n \r\n \r\n SALU:\r\n 00000\r\n \r\n \r\n SMEM:\r\n 00000\r\n \r\n \r\n VALU:\r\n 00000\r\n \r\n \r\n MFMA:\r\n 00000\r\n \r\n \r\n VMEM:\r\n 00000\r\n \r\n \r\n LDS:\r\n 00000\r\n \r\n \r\n GWS:\r\n 00000\r\n \r\n \r\n Br:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n \r\n \r\n cycles\r\n Atomic:\r\n 00000\r\n \r\n \r\n Rd:\r\n 00000\r\n \r\n \r\n Wr:\r\n \r\n \r\n 00000\r\n \r\n \r\n Atomic:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n 00000\r\n Rd:\r\n 00000\r\n Wr:\r\n 00000\r\n Req:\r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n per-GCD\r\n cycles\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n Wave 0 Instr buff\r\n Wave N-1 Instr buff\r\n Active CUs\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Util:\r\n 00000\r\n \r\n \r\n %\r\n Coales:\r\n 00000\r\n Exec\r\n Instr Buff\r\n Instr Dispatch\r\n LDS\r\n Vector L1 Cache\r\n Scalar L1D Cache\r\n Instr L1 Cache\r\n L2 Cache\r\n 00000\r\n Req:\r\n \r\n \r\n %\r\n Stall:\r\n 00000\r\n 00000\r\n Fetch:\r\n 0000000\r\n 00000\r\n 000/000\r\n \r\n Latency\r\n \r\n LDS Alloc:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n \r\n Scratch Alloc:\r\n \r\n 00000\r\n \r\n Wavefronts:\r\n \r\n 00000\r\n \r\n Workgroups:\r\n \r\n 00000\r\n \r\n VGPRs:\r\n \r\n 00000\r\n \r\n SGPRs:\r\n \r\n 00000\r\n \r\n \r\n 00000\r\n Rd:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n Latency\r\n \r\n \r\n \r\n \r\n Text is not SVG - cannot display\r\n \r\n \r\n" + }, + "pluginVersion": "8.4.0", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&vgpr\"\n },\n \"sgpr\": {\n \"$avg\": \"&sgpr\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&lds\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&scr\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$numCU\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_LEVEL_WAVES", + "target": "$Workload1.SQ_LEVEL_WAVES.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_occ\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\",\"&GRBM_GUI_ACTIVE\"] }, $numActiveCUs]}\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Alias\": \"wave_occ_\",\n \"Value\":{ \"$round\": [\"&wave_occ\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "$Workload1.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"lds_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&SQ_INSTS_LDS\", 0] },\n { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\"] },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"LDS Lat\",\n \"Alias\": \"lds_lat_\",\n \"Value\":{ \"$round\": [\"&lds_lat\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_ICACHE_INFLIGHT", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"il1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_ICACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_ICACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"IL1 Lat\",\n \t\t\t\"Alias\": \"il1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&il1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_DCACHE_INFLIGHT_LEVEL", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"sl1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_DCACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_DCACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"vL1D Lat\",\n \t\t\t\"Alias\": \"sl1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&sl1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + } + ], + "title": "Memory Chart (Normalization: $normUnit\")", + "transformations": [ + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "string", + "targetField": "Value" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "amd-custom-svg" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Memory Chart Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 241, + "panels": [ + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 253, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm" + }, + "name": "HBM-VLAU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2" + }, + "name": "L2-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1" + }, + "name": "vL1D-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS" + }, + "name": "LDS-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA" + }, + "name": "HBM-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA" + }, + "name": "L2-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA" + }, + "name": "vL1D-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA" + }, + "name": "LDS-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "rawQuery": true, + "refId": "HBM-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"&high_flop\"\n }\n },\n\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP32/FP64 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + }, + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 312, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_fp16" + }, + "name": "HBM-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_fp16" + }, + "name": "L2-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_fp16" + }, + "name": "vL1D-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_fp16" + }, + "name": "LDS-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_i8" + }, + "name": "HBM-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_i8" + }, + "name": "L2-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_i8" + }, + "name": "vL1D-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_i8" + }, + "name": "LDS-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP16/INT8 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Roofline Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 2, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 13, + "x": 0, + "y": 6 + }, + "id": 6, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Fetcher", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 171 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baselin)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 147 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 11, + "x": 13, + "y": 6 + }, + "id": 4, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Compute", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Metric 1": "", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Command Processor (CPC/CPF)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 102, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 101 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 145 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 97 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 123 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 106, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 285 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 242 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 104, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Resource Allocation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Shader Processor Input (SPI)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 185, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 142 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 196 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 174 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.width", + "value": 168 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min" + }, + "properties": [ + { + "id": "custom.width", + "value": 272 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 225 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "interval": "12h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Wavefront Launch Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true, + "Units 2": true, + "metric 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 223 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 34, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "Wavefront Runtime Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg": "", + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "", + "Unit 2": "" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Wavefront", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 209, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 12, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n \n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Instruction Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 24, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 24, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "limit": 100, + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VALU Arithmetic Instr Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 275, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VMEM Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "VMEM Instr", + "type 1": "VMEM Instr" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 16, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"mmfa_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&SQ_WAVES\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mmfa_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mmfa_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mmfa_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"mmfa_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&SQ_WAVES\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mmfa_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mmfa_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mmfa_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "MFMA Arithmetic Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "MFMA Instr", + "type 1": "MFMA Instr" + } + } + } + ], + "transparent": true, + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Instruction Mix", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 8, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 211, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n\n \"instr_val\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Compute Pipeline", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "mfma_flops_bf16_pop 1": 4, + "mfma_flops_bf16_pop 2": 5, + "mfma_flops_f16_pop 1": 6, + "mfma_flops_f16_pop 2": 7, + "mfma_flops_f32_pop 1": 8, + "mfma_flops_f32_pop 2": 9, + "mfma_flops_f64_pop 1": 10, + "mfma_flops_f64_pop 2": 11, + "mfma_flops_i8_pop 1": 12, + "mfma_flops_i8_pop 2": 13, + "valu_flops_pop 1": 0, + "valu_flops_pop 2": 1, + "valu_iops_pop 1": 2, + "valu_iops_pop 2": 3 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "flops_pop": "FLOPs", + "flops_pop 1": "FLOPs (Current)", + "flops_pop 2": "FLOPs (Baseline)", + "iops_pop": "IOPs", + "iops_pop 1": "IOPs (Current)", + "iops_pop 2": "IOPs (Baseline)", + "mfma_flops_bf16_pop": "MFMA- BF16 (FLOPs)", + "mfma_flops_bf16_pop 1": "MFMA-BF16 (Cur)", + "mfma_flops_bf16_pop 2": "MFMA-BF16 (Baseline)", + "mfma_flops_f16_pop": "MFMA-F16 (FLOPs)", + "mfma_flops_f16_pop 1": "MFMA-F16 (Cur)", + "mfma_flops_f16_pop 2": "MFMA-F16 (Baseline)", + "mfma_flops_f32_pop": "MFMA-F32 (FLOPs)", + "mfma_flops_f32_pop 1": "MFMA-F32 (Cur)", + "mfma_flops_f32_pop 2": "MFMA-F32 (Baseline)", + "mfma_flops_f64_pop": "MFMA-F64 (FLOPs)", + "mfma_flops_f64_pop 1": "MFMA-F64 (Cur)", + "mfma_flops_f64_pop 2": "MFMA-F64 (Baseline)", + "mfma_flops_i8_pop": "MFMA-i8 (IOPs)", + "mfma_flops_i8_pop 1": "MFMA-I8 (Cur)", + "mfma_flops_i8_pop 2": "MFMA-I8 (Baseline)", + "valu_flops_pop": "VALU (FLOPs)", + "valu_flops_pop 1": "VALU FLOPs (Cur)", + "valu_flops_pop 2": "VALU FLOPs (Baseline)", + "valu_iops_pop": "VALU (IOPs)", + "valu_iops_pop 1": "VALU IOPs (Cur)", + "valu_iops_pop 2": "VALU IOPs (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 257, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg\": \"&avg_ipcAvg\",\n \"Min\": \"&min_ipcAvg\",\n \"Max\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg\": \"&avg_ipcIssue\",\n \"Min\": \"&min_ipcIssue\",\n \"Max\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg\": \"&avg_saluUtil\",\n \"Min\": \"&min_saluUtil\",\n \"Max\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg\": \"&avg_valuUtil\",\n \"Min\": \"&min_valuUtil\",\n \"Max\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg\": \"&avg_unpredthreads_val\",\n \"Min\": \"&min_unpredthreads_val\",\n \"Max\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg\": \"&avg_mfmaUtil\",\n \"Min\": \"&min_mfmaUtil\",\n \"Max\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg\": \"&avg_mfmaInstrCycles\",\n \"Min\": \"&min_mfmaInstrCycles\",\n \"Max\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg 2\": \"&avg_ipcAvg\",\n \"Min 2\": \"&min_ipcAvg\",\n \"Max 2\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg 2\": \"&avg_ipcIssue\",\n \"Min 2\": \"&min_ipcIssue\",\n \"Max 2\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg 2\": \"&avg_saluUtil\",\n \"Min 2\": \"&min_saluUtil\",\n \"Max 2\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg 2\": \"&avg_valuUtil\",\n \"Min 2\": \"&min_valuUtil\",\n \"Max 2\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg 2\": \"&avg_unpredthreads_val\",\n \"Min 2\": \"&min_unpredthreads_val\",\n \"Max 2\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg 2\": \"&avg_mfmaUtil\",\n \"Min 2\": \"&min_mfmaUtil\",\n \"Max 2\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg 2\": \"&avg_mfmaInstrCycles\",\n \"Min 2\": \"&min_mfmaInstrCycles\",\n \"Max 2\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Pipeline Stats", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg 2": "Avg (Baseline)", + "Max 2": "Max (Baseline)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 96, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Arithmetic Operations", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 255, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM", + "target": "${Workload1}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg\": \"&avg_vmemLat\",\n \"Min\": \"&min_vmemLat\",\n \"Max\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM", + "target": "${Workload1}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg\":\"&avg_smemLat\",\n \"Min\":\"&min_smemLat\",\n \"Max\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL", + "target": "${Workload1}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg\":\"&avg_instrFetchLat\",\n \"Min\":\"&min_instrFetchLat\",\n \"Max\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "${Workload1}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg\":\"&avg_ldsLat\",\n \"Min\":\"&min_ldsLat\",\n \"Max\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg 2\": \"&avg_vmemLat\",\n \"Min 2\": \"&min_vmemLat\",\n \"Max 2\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg 2\":\"&avg_smemLat\",\n \"Min 2\":\"&min_smemLat\",\n \"Max 2\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL2", + "target": "${Workload2}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg 2\":\"&avg_instrFetchLat\",\n \"Min 2\":\"&min_instrFetchLat\",\n \"Max 2\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS2", + "target": "${Workload2}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg 2\":\"&avg_ldsLat\",\n \"Min 2\":\"&min_ldsLat\",\n \"Max 2\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + } + ], + "title": "Memory Latencies", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Compute Pipeline", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 98, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 205, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: LDS", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Access Rate 1": 6, + "Access Rate 2": 7, + "Bandwith (Pct-of-Peak) 1": 0, + "Bandwith (Pct-of-Peak) 2": 1, + "Bank Conflict Rate 1": 2, + "Bank Conflict Rate 2": 3, + "Utilization 1": 4, + "Utilization 2": 5 + }, + "renameByName": { + "Access Rate 1": "Access Rate (Current)", + "Access Rate 2": "Access Rate (Baseline)", + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "Utilization 1": "Util (Current)", + "Utilization 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "min": -100000000000000000000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 141 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "LDS Stats", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "frame", + "frameNameMode": "field" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Local Data Share (LDS)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 44, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 48, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\" ] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Instruction Cache ", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW (Pct-of-Peak) 1": 4, + "BW (Pct-of-Peak) 2": 5, + "Cache Hit 1": 6, + "Cache Hit 2": 7, + "Stall 1": 2, + "Stall 2": 3, + "Util 1": 0, + "Util 2": 1 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 259, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&avg_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&avg_hits\",\n \"Min\": \"&min_hits\",\n \"Max\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&avg_misses\",\n \"Min\": \"&min_misses\",\n \"Max\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean\": \"&avg_misses_dup\",\n \"Min\": \"&min_misses_dup\",\n \"Max\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n \n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&avg_cacheHit\",\n \"Min\": \"&min_cacheHit\",\n \"Max\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&avg_req\",\n \"Min 2\": \"&min_req\",\n \"Max 2\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&avg_hits\",\n \"Min 2\": \"&min_hits\",\n \"Max 2\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&avg_misses\",\n \"Min 2\" : \"&min_misses\",\n \"Max 2\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean 2\": \"&avg_misses_dup\",\n \"Min 2\": \"&min_misses_dup\",\n \"Max 2\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&avg_cacheHit\",\n \"Min 2\": \"&min_cacheHit\",\n \"Max 2\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Instruction Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "L1I Metric": "", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Instruction Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 203, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L1K-TC BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 54, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "sY628IJnz" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Scalar L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW Pct-of-Peak 1": 0, + "BW Pct-of-Peak 2": 1, + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "Stall 1": 6, + "Stall 2": 7, + "Util 1": 4, + "Util 2": 5 + }, + "renameByName": { + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 261, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\": \"&req_min\",\n \"Max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\": \"&hits_min\",\n \"Max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&misses_avg\",\n \"Min\": \"&misses_min\",\n \"Max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean\": \"&dup_misses_avg\",\n \"Min\": \"&dup_misses_min\",\n \"Max\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&cacheHit_avg\",\n \"Min\": \"&cacheHit_min\",\n \"Max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean\": \"&read1d_avg\",\n \"Min\": \"&read1d_min\",\n \"Max\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean\": \"&read2d_avg\",\n \"Min\": \"&read2d_min\",\n \"Max\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean\": \"&read4d_avg\",\n \"Min\": \"&read4d_min\",\n \"Max\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean\": \"&read8d_avg\",\n \"Min\": \"&read8d_min\",\n \"Max\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean\": \"&read16d_avg\",\n \"Min\": \"&read16d_min\",\n \"Max\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }}, \n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&req_avg\",\n \"Min 2\": \"&req_min\",\n \"Max 2\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&hits_avg\",\n \"Min 2\": \"&hits_min\",\n \"Max 2\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&misses_avg\",\n \"Min 2\": \"&misses_min\",\n \"Max 2\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean 2\": \"&dup_misses_avg\",\n \"Min 2\": \"&dup_misses_min\",\n \"Max 2\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&cacheHit_avg\",\n \"Min 2\": \"&cacheHit_min\",\n \"Max 2\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean 2\": \"&readReq_avg\",\n \"Min 2\": \"&readReq_min\",\n \"Max 2\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req (Total)\",\n \"Mean 2\": \"&writeReq_avg\",\n \"Min 2\": \"&writeReq_min\",\n \"Max 2\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean 2\": \"&atomicReq_avg\",\n \"Min 2\": \"&atomicReq_min\",\n \"Max 2\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean 2\": \"&read1d_avg\",\n \"Min 2\": \"&read1d_min\",\n \"Max 2\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean 2\": \"&read2d_avg\",\n \"Min 2\": \"&read2d_min\",\n \"Max 2\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean 2\": \"&read4d_avg\",\n \"Min 2\": \"&read4d_min\",\n \"Max 2\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean 2\": \"&read8d_avg\",\n \"Min 2\": \"&read8d_min\",\n \"Max 2\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean 2\": \"&read16d_avg\",\n \"Min 2\": \"&read16d_min\",\n \"Max 2\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 134 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 52, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache - L2 Interface", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Scalar L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 130, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 132, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "TA", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 136 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 134, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "TD", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Texture Addresser and Texture Data (TA/TD)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 112, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 165, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[100, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$multiply\":[\"&GRBM_GUI_ACTIVE\", $numCU, 4]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": \"&cacheBW_pct\",\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[100, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$multiply\":[\"&GRBM_GUI_ACTIVE\", $numCU2, 4]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": \"&cacheBW_pct\",\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Vector L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "128B Read Combining 1": 6, + "128B Read Combining 2": 7, + "Buffer Coalescing 1": 0, + "Buffer Coalescing 2": 1, + "Cache BW 1": 2, + "Cache BW 2": 3, + "Cache Hit 1": 4, + "Cache Hit 2": 5 + }, + "renameByName": { + "128B Read Combining 1": "128B Read Combining (Current)", + "128B Read Combining 2": "128B Read Combining(Baseline)", + "Buffer Coalescing 1": "Buf Coalescing (Current)", + "Buffer Coalescing 2": "Buf Coalescing (Baseline)", + "Cache BW 1": "Cache BW (Current)", + "Cache BW 2": "Cache BW (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Cache Util 1": "Cache Util (Current)", + "Cache Util 2": "Cache Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-background" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 52 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 199 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 116, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Stalls", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true, + "unit 2": true + }, + "indexByName": { + "Max 1": 6, + "Max 2": 7, + "Mean 1": 2, + "Mean 2": 3, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 4, + "Min 2": 5, + "unit 1": 9, + "unit 2": 8 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "unit 1": "Unit" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 116 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 78 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 50 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 128, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n\n\n \"l2TCRRead_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2TCRRead_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2TCRRead_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n\n \"l2Write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2Write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2Write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n \"l2Atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2Atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2Atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-TCR Read\",\n \"avg\": \"&l2TCRRead_avg\",\n \"min\": \"&l2TCRRead_min\",\n \"max\": \"&l2TCRRead_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2Write_avg\",\n \"min\": \"&l2Write_min\",\n \"max\": \"&l2Write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2Atomic_avg\",\n \"min\": \"&l2Atomic_min\",\n \"max\": \"&l2Atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n\n\n \"l2TCRRead_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2TCRRead_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2TCRRead_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n\n \"l2Write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2Write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2Write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n \"l2Atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2Atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2Atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-TCR Read\",\n \"avg\": \"&l2TCRRead_avg\",\n \"min\": \"&l2TCRRead_min\",\n \"max\": \"&l2TCRRead_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2Write_avg\",\n \"min\": \"&l2Write_min\",\n \"max\": \"&l2Write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2Atomic_avg\",\n \"min\": \"&l2Atomic_min\",\n \"max\": \"&l2Atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Coherency", + "GroupCols": 2, + "GroupGap": 5, + "GroupLabelColor": "#FF9830", + "GroupLabelFontSize": "100%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Xfer", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FADE2A", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:172", + "Col": 2, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Mean", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 120, + "pluginVersion": "8.2.1", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n\n \"readNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \n \"writeNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \n \"atomicNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"NC\",\n \"Mean\": \"&readNC_avg\",\n \"Min\": \"&readNC_min\",\n \"Max\": \"&readNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"UC\",\n \"Mean\": \"&readUC_avg\",\n \"Min\": \"&readUC_min\",\n \"Max\": \"&readUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"CC\",\n \"Mean\": \"&readCC_avg\",\n \"Min\": \"&readCC_min\",\n \"Max\": \"&readCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"RW\",\n \"Mean\": \"&readRW_avg\",\n \"Min\": \"&readRW_min\",\n \"Max\": \"&readRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"RW\",\n \"Mean\": \"&writeRW_avg\",\n \"Min\": \"&writeRW_min\",\n \"Max\": \"&writeRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"NC\",\n \"Mean\": \"&writeNC_avg\",\n \"Min\": \"&writeNC_min\",\n \"Max\": \"&writeNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"UC\",\n \"Mean\": \"&writeUC_avg\",\n \"Min\": \"&writeUC_min\",\n \"Max\": \"&writeUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"CC\",\n \"Mean\": \"&writeCC_avg\",\n \"Min\": \"&writeCC_min\",\n \"Max\": \"&writeCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"NC\",\n \"Mean\": \"&atomicNC_avg\",\n \"Min\": \"&atomicNC_min\",\n \"Max\": \"&atomicNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"UC\",\n \"Mean\": \"&atomicUC_avg\",\n \"Min\": \"&atomicUC_min\",\n \"Max\": \"&atomicUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"CC\",\n \"Mean\": \"&atomicCC_avg\",\n \"Min\": \"&atomicCC_min\",\n \"Max\": \"&atomicCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"RW\",\n \"Mean\": \"&atomicRW_avg\",\n \"Min\": \"&atomicRW_min\",\n \"Max\": \"&atomicRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D - L2 Transactions Req $normUnit", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Units" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 124, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Addr Translation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Units 1": 9, + "Units 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Vector L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 56, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + }, + { + "id": "color" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Util" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 100 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache Hit" + }, + "properties": [ + { + "id": "max", + "value": 100 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Wr BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 64, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks2\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: L2 Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "L2 Util 1": 0, + "L2 Util 2": 1, + "L2-EA Rd BW 1": 4, + "L2-EA Rd BW 2": 5, + "L2-EA Wr BW 1": 6, + "L2-EA Wr BW 2": 7 + }, + "renameByName": { + "Cache Hit 1": "L2 Cache Hit (Current)", + "Cache Hit 2": "L2 Cache Hit (Baseline)", + "L2 Util 1": "L2 Util (Current)", + "L2 Util 2": "L2 Util (Baseline)", + "L2-EA Rd BW - GB/s 1": "L2-EA RD BW (Current)", + "L2-EA Rd BW - GB/s 2": "L2-EA RD BW (baseline)", + "L2-EA Rd BW 1": "L2-EA Rd BW (Current)", + "L2-EA Rd BW 2": "L2-EA Rd BW (Baseline)", + "L2-EA Wr BW - GB/s 1": "L2-EA WR BW (Current)", + "L2-EA Wr BW - GB/s 2": "L2-EA WR BW (Baseline)", + "L2-EA Wr BW 1": "L2-EA Wr BW (Current)", + "L2-EA Wr BW 2": "L2-EA Wr BW (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 62, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Transactions", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 178 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + } + ] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 58, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "L2 Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Transaction", + "GroupCols": 1, + "GroupGap": 5, + "GroupLabelColor": "#FADE2A", + "GroupLabelFontSize": "120%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Metric", + "LabelColor": "#ffffff", + "LabelFontSize": "80%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FF9830", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:81", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 60, + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"ioStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"ioStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"creditStarvation_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_min\": {\n \"$min\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_max\": {\n \"$max\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n } \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_READ_avg\",\n \"Min\": \"&ioStall_READ_min\",\n \"Max\": \"&ioStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_READ_avg\",\n \"Min\": \"&gmiStall_READ_min\",\n \"Max\": \"&gmiStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_READ_avg\",\n \"Min\": \"&hbmStall_READ_min\",\n \"Max\": \"&hbmStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_WRITE_avg\",\n \"Min\": \"&ioStall_WRITE_min\",\n \"Max\": \"&ioStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_WRITE_avg\",\n \"Min\": \"&gmiStall_WRITE_min\",\n \"Max\": \"&gmiStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_WRITE_avg\",\n \"Min\": \"&hbmStall_WRITE_min\",\n \"Max\": \"&hbmStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Credit Starvation\",\n \"Transaction\": \"Write\",\n \"Target\": \"Fabric\",\n \"Avg\": \"&creditStarvation_avg\",\n \"Min\": \"&creditStarvation_min\",\n \"Max\": \"&creditStarvation_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Interface Stalls (Cycles $normUnit)", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 66, + "panels": [ + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 87, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 Cache Hit Rate (Percent) (Channel 0 - 15) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 92, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "Cache Hit Rate % (Channel 16 - 31) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:565", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 81, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Read Requests(Channel 0-15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:656", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 82, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L 2 Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:697", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 83, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:750", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 84, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 85, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 91, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 189, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 195, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 191, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 197, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 193, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Request (Channel 0 - 15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 199, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "", + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 73 + }, + "hideTimeOverride": false, + "id": 68, + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b0_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[0]\"]}, \n { \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b0_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[0]\"}, \"&denom\"] } \n },\n \"b0_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[0]\"}, \"&denom\"] } \n },\n \"b0_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[0]\"}, \"&denom\"] } \n },\n \"b0_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[0]\"}, \"&denom\"] } \n },\n \"b0_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[0]\"}, \"&denom\"] }\n },\n \"b0_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[0]\"}, \"&denom\"] } \n },\n \"b0_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[0]\"}, \"&denom\"] } \n },\n\n \"b0_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[0]\", \"&TCC_EA_RDREQ[0]\"]}, null] } },\n \"b0_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[0]\", \"&TCC_EA_WRREQ[0]\"]}, null] } },\n \"b0_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[0]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[0]\", \"&TCC_EA_ATOMIC[0]\"]}, null]}},\n\n \"b0_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"}, \"&denom\"] }},\n\n \n \"b1_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[1]\"]}, \n { \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b1_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[1]\"}, \"&denom\"] } \n },\n \"b1_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[1]\"}, \"&denom\"] } \n },\n \"b1_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[1]\"}, \"&denom\"] } \n },\n \"b1_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[1]\"}, \"&denom\"] }\n },\n \"b1_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[1]\"}, \"&denom\"] } \n },\n \"b1_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[1]\", \"&TCC_EA_RDREQ[1]\"]}, null] } },\n \"b1_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[1]\", \"&TCC_EA_WRREQ[1]\"]}, null] } },\n \"b1_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[1]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[1]\", \"&TCC_EA_ATOMIC[1]\"]}, null]}},\n\n \"b1_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"}, \"&denom\"] }},\n\n\n \"b2_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[2]\"]}, \n { \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b2_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[2]\"}, \"&denom\"] }\n },\n \"b2_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[2]\"}, \"&denom\"] } \n },\n \"b2_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[2]\"}, \"&denom\"] }\n },\n \"b2_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[2]\", \"&TCC_EA_RDREQ[2]\"]}, null] } },\n \"b2_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[2]\", \"&TCC_EA_WRREQ[2]\"]}, null] } },\n \"b2_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[2]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[2]\", \"&TCC_EA_ATOMIC[2]\"]}, null]}},\n\n \"b2_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"}, \"&denom\"] }},\n\n\n \n \"b3_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[3]\"]}, \n { \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b3_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[3]\"}, \"&denom\"] } \n },\n \"b3_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[3]\"}, \"&denom\"] } \n },\n \"b3_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[3]\"}, \"&denom\"] }\n },\n \"b3_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[3]\"}, \"&denom\"] }\n },\n \"b3_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[3]\"}, \"&denom\"] } \n },\n \"b3_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[3]\", \"&TCC_EA_RDREQ[3]\"]}, null] } },\n \"b3_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[3]\", \"&TCC_EA_WRREQ[3]\"]}, null] } },\n \"b3_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[3]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[3]\", \"&TCC_EA_ATOMIC[3]\"]}, null]}},\n\n \"b3_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"}, \"&denom\"] }},\n\n\n \n \"b4_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[4]\"]}, \n { \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b4_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[4]\"}, \"&denom\"] } \n },\n \"b4_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[4]\"}, \"&denom\"] } \n },\n \"b4_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[4]\"}, \"&denom\"] }\n },\n \"b4_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[4]\", \"&TCC_EA_RDREQ[4]\"]}, null] } },\n \"b4_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[4]\", \"&TCC_EA_WRREQ[4]\"]}, null] } },\n \"b4_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[4]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[4]\", \"&TCC_EA_ATOMIC[4]\"]}, null]}},\n\n \"b4_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"}, \"&denom\"] }},\n\n\n \n \"b5_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[5]\"]}, \n { \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b5_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[5]\"}, \"&denom\"] } \n },\n \"b5_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[5]\"}, \"&denom\"] } \n },\n \"b5_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[5]\"}, \"&denom\"] } \n },\n \"b5_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[5]\", \"&TCC_EA_RDREQ[5]\"]}, null] } },\n \"b5_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[5]\", \"&TCC_EA_WRREQ[5]\"]}, null] } },\n \"b5_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[5]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[5]\", \"&TCC_EA_ATOMIC[5]\"]}, null]}},\n\n \"b5_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"}, \"&denom\"] }},\n\n\n \n \"b6_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[6]\"]}, \n { \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b6_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[6]\"}, \"&denom\"] } \n },\n \"b6_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[6]\"}, \"&denom\"] } \n },\n \"b6_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[6]\"}, \"&denom\"] } \n },\n \"b6_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[6]\"}, \"&denom\"] } \n },\n \"b6_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[6]\"}, \"&denom\"] }\n },\n \"b6_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[6]\", \"&TCC_EA_RDREQ[6]\"]}, null] } },\n \"b6_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[6]\", \"&TCC_EA_WRREQ[6]\"]}, null] } },\n \"b6_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[6]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[6]\", \"&TCC_EA_ATOMIC[6]\"]}, null]}},\n\n \"b6_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"}, \"&denom\"] }},\n\n\n \n \"b7_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[7]\"]}, \n { \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b7_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[7]\"}, \"&denom\"] } \n },\n \"b7_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[7]\"}, \"&denom\"] } \n },\n \"b7_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[7]\"}, \"&denom\"] } \n },\n \"b7_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[7]\"}, \"&denom\"] } \n },\n \"b7_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[7]\"}, \"&denom\"] }\n },\n \"b7_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[7]\", \"&TCC_EA_RDREQ[7]\"]}, null] } },\n \"b7_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[7]\", \"&TCC_EA_WRREQ[7]\"]}, null] } },\n \"b7_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[7]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[7]\", \"&TCC_EA_ATOMIC[7]\"]}, null]}},\n\n \"b7_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"}, \"&denom\"] }},\n\n\n \n \"b8_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[8]\"]}, \n { \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b8_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[8]\"}, \"&denom\"] } \n },\n \"b8_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[8]\"}, \"&denom\"] } \n },\n \"b8_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[8]\"}, \"&denom\"] } \n },\n \"b8_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[8]\", \"&TCC_EA_RDREQ[8]\"]}, null] } },\n \"b8_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[8]\", \"&TCC_EA_WRREQ[8]\"]}, null] } },\n \"b8_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[8]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[8]\", \"&TCC_EA_ATOMIC[8]\"]}, null]}},\n\n \"b8_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"}, \"&denom\"] }},\n\n\n \n \"b9_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[9]\"]}, \n { \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b9_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[9]\"}, \"&denom\"] } \n },\n \"b9_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[9]\"}, \"&denom\"] } \n },\n \"b9_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[9]\"}, \"&denom\"] } \n },\n \"b9_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[9]\", \"&TCC_EA_RDREQ[9]\"]}, null] } },\n \"b9_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[9]\", \"&TCC_EA_WRREQ[9]\"]}, null] } },\n \"b9_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[9]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[9]\", \"&TCC_EA_ATOMIC[9]\"]}, null]}},\n\n \"b9_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"}, \"&denom\"] }},\n\n\n \n \"b10_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[10]\"]}, \n { \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b10_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[10]\"}, \"&denom\"] } \n },\n \"b10_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[10]\"}, \"&denom\"] } \n },\n \"b10_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[10]\"}, \"&denom\"] } \n },\n \"b10_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[10]\", \"&TCC_EA_RDREQ[10]\"]}, null] } },\n \"b10_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[10]\", \"&TCC_EA_WRREQ[10]\"]}, null] } },\n \"b10_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[10]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[10]\", \"&TCC_EA_ATOMIC[10]\"]}, null]}},\n\n \"b10_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"}, \"&denom\"] }},\n\n\n \n \"b11_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[11]\"]}, \n { \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b11_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[11]\"}, \"&denom\"] } \n },\n \"b11_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[11]\"}, \"&denom\"] } \n },\n \"b11_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[11]\"}, \"&denom\"] } \n },\n \"b11_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[11]\", \"&TCC_EA_RDREQ[11]\"]}, null] } },\n \"b11_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[11]\", \"&TCC_EA_WRREQ[11]\"]}, null] } },\n \"b11_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[11]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[11]\", \"&TCC_EA_ATOMIC[11]\"]}, null]}},\n\n \"b11_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"}, \"&denom\"] }},\n\n\n \n \"b12_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[12]\"]}, \n { \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b12_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[12]\"}, \"&denom\"] } \n },\n \"b12_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[12]\"}, \"&denom\"] } \n },\n \"b12_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[12]\"}, \"&denom\"] } \n },\n \"b12_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[12]\", \"&TCC_EA_RDREQ[12]\"]}, null] } },\n \"b12_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[12]\", \"&TCC_EA_WRREQ[12]\"]}, null] } },\n \"b12_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[12]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[12]\", \"&TCC_EA_ATOMIC[12]\"]}, null]}},\n\n \"b12_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"}, \"&denom\"] }},\n\n\n \n \"b13_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[13]\"]}, \n { \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b13_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[13]\"}, \"&denom\"] } \n },\n \"b13_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[13]\"}, \"&denom\"] } \n },\n \"b13_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[13]\"}, \"&denom\"] } \n },\n \"b13_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[13]\"}, \"&denom\"] } \n },\n \"b13_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[13]\"}, \"&denom\"] }\n },\n \"b13_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[13]\", \"&TCC_EA_RDREQ[13]\"]}, null] } },\n \"b13_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[13]\", \"&TCC_EA_WRREQ[13]\"]}, null] } },\n \"b13_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[13]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[13]\", \"&TCC_EA_ATOMIC[13]\"]}, null]}},\n\n \"b13_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"}, \"&denom\"] }},\n\n\n \n \"b14_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[14]\"]}, \n { \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b14_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[14]\"}, \"&denom\"] } \n },\n \"b14_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[14]\"}, \"&denom\"] } \n },\n \"b14_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[14]\"}, \"&denom\"] } \n },\n \"b14_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[14]\", \"&TCC_EA_RDREQ[14]\"]}, null] } },\n \"b14_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[14]\", \"&TCC_EA_WRREQ[14]\"]}, null] } },\n \"b14_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[14]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[14]\", \"&TCC_EA_ATOMIC[14]\"]}, null]}},\n\n \"b14_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"}, \"&denom\"] }},\n\n\n \n \"b15_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[15]\"]}, \n { \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b15_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[15]\"}, \"&denom\"] } \n },\n \"b15_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[15]\"}, \"&denom\"] } \n },\n \"b15_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[15]\"}, \"&denom\"] } \n },\n \"b15_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[15]\"}, \"&denom\"] } \n },\n \"b15_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[15]\"}, \"&denom\"] }\n },\n \"b15_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[15]\", \"&TCC_EA_RDREQ[15]\"]}, null] } },\n \"b15_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[15]\", \"&TCC_EA_WRREQ[15]\"]}, null] } },\n \"b15_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[15]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[15]\", \"&TCC_EA_ATOMIC[15]\"]}, null]}},\n\n \"b15_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"}, \"&denom\"] }}\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"0\",\n \"Hit Rate\": \"&b0_hitRate\",\n \"Req\": \"&b0_req\",\n \"Read Req\": \"&b0_readReq\",\n \"Write Req\": \"&b0_writeReq\",\n \"AtomicReq\": \"&b0_atomicReq\",\n \"EA Read Req\": \"&b0_eaReadReq\",\n \"EA Write Req\": \"&b0_eaWriteReq\",\n \"EA AtomicReq\": \"&b0_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b0_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b0_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b0_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b0_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b0_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b0_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b0_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b0_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b0_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b0_ea_write_stall_too_many\"\n },\n {\n \"Channel\": \"1\",\n \"Hit Rate\": \"&b1_hitRate\",\n \"Req\": \"&b1_req\",\n \"Read Req\": \"&b1_readReq\",\n \"Write Req\": \"&b1_writeReq\",\n \"AtomicReq\": \"&b1_atomicReq\",\n \"EA Read Req\": \"&b1_eaReadReq\",\n \"EA Write Req\": \"&b1_eaWriteReq\",\n \"EA AtomicReq\": \"&b1_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b1_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b1_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b1_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b1_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b1_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b1_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b1_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b1_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b1_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b1_ea_write_stall_too_many\"\n },\n {\n \"Channel\": \"2\",\n \"Hit Rate\": \"&b2_hitRate\",\n \"Req\": \"&b2_req\",\n \"Read Req\": \"&b2_readReq\",\n \"Write Req\": \"&b2_writeReq\",\n \"AtomicReq\": \"&b2_atomicReq\",\n \"EA Read Req\": \"&b2_eaReadReq\",\n \"EA Write Req\": \"&b2_eaWriteReq\",\n \"EA AtomicReq\": \"&b2_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b2_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b2_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b2_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b2_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b2_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b2_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b2_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b2_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b2_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b2_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"3\",\n \"Hit Rate\": \"&b3_hitRate\",\n \"Req\": \"&b3_req\",\n \"Read Req\": \"&b3_readReq\",\n \"Write Req\": \"&b3_writeReq\",\n \"AtomicReq\": \"&b3_atomicReq\",\n \"EA Read Req\": \"&b3_eaReadReq\",\n \"EA Write Req\": \"&b3_eaWriteReq\",\n \"EA AtomicReq\": \"&b3_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b3_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b3_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b3_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b3_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b3_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b3_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b3_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b3_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b3_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b3_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"4\",\n \"Hit Rate\": \"&b4_hitRate\",\n \"Req\": \"&b4_req\",\n \"Read Req\": \"&b4_readReq\",\n \"Write Req\": \"&b4_writeReq\",\n \"AtomicReq\": \"&b4_atomicReq\",\n \"EA Read Req\": \"&b4_eaReadReq\",\n \"EA Write Req\": \"&b4_eaWriteReq\",\n \"EA AtomicReq\": \"&b4_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b4_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b4_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b4_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b4_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b4_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b4_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b4_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b4_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b4_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b4_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"5\",\n \"Hit Rate\": \"&b5_hitRate\",\n \"Req\": \"&b5_req\",\n \"Read Req\": \"&b5_readReq\",\n \"Write Req\": \"&b5_writeReq\",\n \"AtomicReq\": \"&b5_atomicReq\",\n \"EA Read Req\": \"&b5_eaReadReq\",\n \"EA Write Req\": \"&b5_eaWriteReq\",\n \"EA AtomicReq\": \"&b5_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b5_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b5_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b5_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b5_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b5_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b5_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b5_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b5_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b5_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b5_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"6\",\n \"Hit Rate\": \"&b6_hitRate\",\n \"Req\": \"&b6_req\",\n \"Read Req\": \"&b6_readReq\",\n \"Write Req\": \"&b6_writeReq\",\n \"AtomicReq\": \"&b6_atomicReq\",\n \"EA Read Req\": \"&b6_eaReadReq\",\n \"EA Write Req\": \"&b6_eaWriteReq\",\n \"EA AtomicReq\": \"&b6_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b6_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b6_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b6_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b6_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b6_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b6_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b6_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b6_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b6_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b6_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"7\",\n \"Hit Rate\": \"&b7_hitRate\",\n \"Req\": \"&b7_req\",\n \"Read Req\": \"&b7_readReq\",\n \"Write Req\": \"&b7_writeReq\",\n \"AtomicReq\": \"&b7_atomicReq\",\n \"EA Read Req\": \"&b7_eaReadReq\",\n \"EA Write Req\": \"&b7_eaWriteReq\",\n \"EA AtomicReq\": \"&b7_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b7_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b7_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b7_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b7_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b7_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b7_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b7_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b7_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b7_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b7_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"8\",\n \"Hit Rate\": \"&b8_hitRate\",\n \"Req\": \"&b8_req\",\n \"Read Req\": \"&b8_readReq\",\n \"Write Req\": \"&b8_writeReq\",\n \"AtomicReq\": \"&b8_atomicReq\",\n \"EA Read Req\": \"&b8_eaReadReq\",\n \"EA Write Req\": \"&b8_eaWriteReq\",\n \"EA AtomicReq\": \"&b8_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b8_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b8_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b8_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b8_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b8_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b8_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b8_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b8_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b8_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b8_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"9\",\n \"Hit Rate\": \"&b9_hitRate\",\n \"Req\": \"&b9_req\",\n \"Read Req\": \"&b9_readReq\",\n \"Write Req\": \"&b9_writeReq\",\n \"AtomicReq\": \"&b9_atomicReq\",\n \"EA Read Req\": \"&b9_eaReadReq\",\n \"EA Write Req\": \"&b9_eaWriteReq\",\n \"EA AtomicReq\": \"&b9_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b9_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b9_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b9_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b9_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b9_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b9_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b9_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b9_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b9_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b9_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"10\",\n \"Hit Rate\": \"&b10_hitRate\",\n \"Req\": \"&b10_req\",\n \"Read Req\": \"&b10_readReq\",\n \"Write Req\": \"&b10_writeReq\",\n \"AtomicReq\": \"&b10_atomicReq\",\n \"EA Read Req\": \"&b10_eaReadReq\",\n \"EA Write Req\": \"&b10_eaWriteReq\",\n \"EA AtomicReq\": \"&b10_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b10_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b10_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b10_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b10_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b10_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b10_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b10_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b10_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b10_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b10_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"11\",\n \"Hit Rate\": \"&b11_hitRate\",\n \"Req\": \"&b11_req\",\n \"Read Req\": \"&b11_readReq\",\n \"Write Req\": \"&b11_writeReq\",\n \"AtomicReq\": \"&b11_atomicReq\",\n \"EA Read Req\": \"&b11_eaReadReq\",\n \"EA Write Req\": \"&b11_eaWriteReq\",\n \"EA AtomicReq\": \"&b11_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b11_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b11_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b11_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b11_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b11_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b11_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b11_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b11_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b11_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b11_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"12\",\n \"Hit Rate\": \"&b12_hitRate\",\n \"Req\": \"&b12_req\",\n \"Read Req\": \"&b12_readReq\",\n \"Write Req\": \"&b12_writeReq\",\n \"AtomicReq\": \"&b12_atomicReq\",\n \"EA Read Req\": \"&b12_eaReadReq\",\n \"EA Write Req\": \"&b12_eaWriteReq\",\n \"EA AtomicReq\": \"&b12_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b12_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b12_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b12_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b12_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b12_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b12_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b12_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b12_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b12_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b12_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"13\",\n \"Hit Rate\": \"&b13_hitRate\",\n \"Req\": \"&b13_req\",\n \"Read Req\": \"&b13_readReq\",\n \"Write Req\": \"&b13_writeReq\",\n \"AtomicReq\": \"&b13_atomicReq\",\n \"EA Read Req\": \"&b13_eaReadReq\",\n \"EA Write Req\": \"&b13_eaWriteReq\",\n \"EA AtomicReq\": \"&b13_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b13_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b13_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b13_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b13_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b13_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b13_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b13_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b13_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b13_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b13_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"14\",\n \"Hit Rate\": \"&b14_hitRate\",\n \"Req\": \"&b14_req\",\n \"Read Req\": \"&b14_readReq\",\n \"Write Req\": \"&b14_writeReq\",\n \"AtomicReq\": \"&b14_atomicReq\",\n \"EA Read Req\": \"&b14_eaReadReq\",\n \"EA Write Req\": \"&b14_eaWriteReq\",\n \"EA AtomicReq\": \"&b14_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b14_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b14_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b14_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b14_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b14_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b14_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b14_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b14_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b14_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b14_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"15\",\n \"Hit Rate\": \"&b15_hitRate\",\n \"Req\": \"&b15_req\",\n \"Read Req\": \"&b15_readReq\",\n \"Write Req\": \"&b15_writeReq\",\n \"AtomicReq\": \"&b15_atomicReq\",\n \"EA Read Req\": \"&b15_eaReadReq\",\n \"EA Write Req\": \"&b15_eaWriteReq\",\n \"EA AtomicReq\": \"&b15_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b15_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b15_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b15_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b15_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b15_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b15_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b15_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b15_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b15_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b15_ea_write_stall_too_many\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 73 + }, + "id": 70, + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b16_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[16]\"]}, \n { \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b16_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[16]\"}, \"&denom\"] } \n },\n \"b16_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[16]\"}, \"&denom\"] } \n },\n \"b16_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[16]\"}, \"&denom\"] } \n },\n \"b16_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[16]\"}, \"&denom\"] } \n },\n \"b16_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[16]\"}, \"&denom\"] }\n },\n \"b16_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[16]\"}, \"&denom\"] } \n },\n \"b16_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[16]\"}, \"&denom\"] } \n },\n\n \"b16_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[16]\", \"&TCC_EA_RDREQ[16]\"]}, null] } },\n \"b16_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[16]\", \"&TCC_EA_WRREQ[16]\"]}, null] } },\n \"b16_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[16]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[16]\", \"&TCC_EA_ATOMIC[16]\"]}, null]}},\n \"b16_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"}, \"&denom\"] }},\n\n \n \"b17_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[17]\"]}, \n { \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b17_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[17]\"}, \"&denom\"] } \n },\n \"b17_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[17]\"}, \"&denom\"] } \n },\n \"b17_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[17]\"}, \"&denom\"] } \n },\n \"b17_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[17]\"}, \"&denom\"] }\n },\n \"b17_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[17]\"}, \"&denom\"] } \n },\n \"b17_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[17]\", \"&TCC_EA_RDREQ[17]\"]}, null] } },\n \"b17_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[17]\", \"&TCC_EA_WRREQ[17]\"]}, null] } },\n \"b17_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[17]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[17]\", \"&TCC_EA_ATOMIC[17]\"]}, null]}},\n \"b17_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"}, \"&denom\"] }},\n\n \n \"b18_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[18]\"]}, \n { \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b18_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[18]\"}, \"&denom\"] }\n },\n \"b18_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[18]\"}, \"&denom\"] } \n },\n \"b18_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[18]\"}, \"&denom\"] }\n },\n \"b18_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[18]\", \"&TCC_EA_RDREQ[18]\"]}, null] } },\n \"b18_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[18]\", \"&TCC_EA_WRREQ[18]\"]}, null] } },\n \"b18_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[18]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[18]\", \"&TCC_EA_ATOMIC[18]\"]}, null]}},\n \"b18_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"}, \"&denom\"] }},\n\n \n \"b19_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[19]\"]}, \n { \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b19_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[19]\"}, \"&denom\"] } \n },\n \"b19_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[19]\"}, \"&denom\"] } \n },\n \"b19_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[19]\"}, \"&denom\"] }\n },\n \"b19_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[19]\"}, \"&denom\"] }\n },\n \"b19_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[19]\"}, \"&denom\"] } \n },\n \"b19_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[19]\", \"&TCC_EA_RDREQ[19]\"]}, null] } },\n \"b19_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[19]\", \"&TCC_EA_WRREQ[19]\"]}, null] } },\n \"b19_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[19]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[19]\", \"&TCC_EA_ATOMIC[19]\"]}, null]}},\n \"b19_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"}, \"&denom\"] }},\n\n \n \"b20_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[20]\"]}, \n { \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b20_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[20]\"}, \"&denom\"] } \n },\n \"b20_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[20]\"}, \"&denom\"] } \n },\n \"b20_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[20]\"}, \"&denom\"] }\n },\n \"b20_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[20]\", \"&TCC_EA_RDREQ[20]\"]}, null] } },\n \"b20_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[20]\", \"&TCC_EA_WRREQ[20]\"]}, null] } },\n \"b20_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[20]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[20]\", \"&TCC_EA_ATOMIC[20]\"]}, null]}},\n \"b20_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"}, \"&denom\"] }},\n\n \n\n \"b21_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[21]\"]}, \n { \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b21_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[21]\"}, \"&denom\"] } \n },\n \"b21_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[21]\"}, \"&denom\"] } \n },\n \"b21_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[21]\"}, \"&denom\"] } \n },\n \"b21_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[21]\", \"&TCC_EA_RDREQ[21]\"]}, null] } },\n \"b21_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[21]\", \"&TCC_EA_WRREQ[21]\"]}, null] } },\n \"b21_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[21]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[21]\", \"&TCC_EA_ATOMIC[21]\"]}, null]}},\n \"b21_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"}, \"&denom\"] }},\n\n \n\n \"b22_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[22]\"]}, \n { \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b22_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[22]\"}, \"&denom\"] } \n },\n \"b22_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[22]\"}, \"&denom\"] } \n },\n \"b22_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[22]\"}, \"&denom\"] } \n },\n \"b22_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[22]\"}, \"&denom\"] } \n },\n \"b22_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[22]\"}, \"&denom\"] }\n },\n \"b22_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[22]\", \"&TCC_EA_RDREQ[22]\"]}, null] } },\n \"b22_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[22]\", \"&TCC_EA_WRREQ[22]\"]}, null] } },\n \"b22_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[22]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[22]\", \"&TCC_EA_ATOMIC[22]\"]}, null]}},\n \"b22_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"}, \"&denom\"] }},\n\n \n\n \"b23_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[23]\"]}, \n { \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b23_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[23]\"}, \"&denom\"] } \n },\n \"b23_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[23]\"}, \"&denom\"] } \n },\n \"b23_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[23]\"}, \"&denom\"] } \n },\n \"b23_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[23]\"}, \"&denom\"] } \n },\n \"b23_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[23]\"}, \"&denom\"] }\n },\n \"b23_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[23]\", \"&TCC_EA_RDREQ[23]\"]}, null] } },\n \"b23_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[23]\", \"&TCC_EA_WRREQ[23]\"]}, null] } },\n \"b23_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[23]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[23]\", \"&TCC_EA_ATOMIC[23]\"]}, null]}},\n \"b23_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"}, \"&denom\"] }},\n\n \n \"b24_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[24]\"]}, \n { \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b24_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[24]\"}, \"&denom\"] } \n },\n \"b24_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[24]\"}, \"&denom\"] } \n },\n \"b24_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[24]\"}, \"&denom\"] } \n },\n \"b24_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[24]\", \"&TCC_EA_RDREQ[24]\"]}, null] } },\n \"b24_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[24]\", \"&TCC_EA_WRREQ[24]\"]}, null] } },\n \"b24_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[24]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[24]\", \"&TCC_EA_ATOMIC[24]\"]}, null]}},\n \"b24_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"}, \"&denom\"] }},\n\n \n \"b25_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[25]\"]}, \n { \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b25_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[25]\"}, \"&denom\"] } \n },\n \"b25_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[25]\"}, \"&denom\"] } \n },\n \"b25_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[25]\"}, \"&denom\"] } \n },\n \"b25_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[25]\", \"&TCC_EA_RDREQ[25]\"]}, null] } },\n \"b25_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[25]\", \"&TCC_EA_WRREQ[25]\"]}, null] } },\n \"b25_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[25]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[25]\", \"&TCC_EA_ATOMIC[25]\"]}, null]}},\n \"b25_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"}, \"&denom\"] }},\n\n \n \"b26_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[26]\"]}, \n { \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b26_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[26]\"}, \"&denom\"] } \n },\n \"b26_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[26]\"}, \"&denom\"] } \n },\n \"b26_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[26]\"}, \"&denom\"] } \n },\n \"b26_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[26]\", \"&TCC_EA_RDREQ[26]\"]}, null] } },\n \"b26_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[26]\", \"&TCC_EA_WRREQ[26]\"]}, null] } },\n \"b26_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[26]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[26]\", \"&TCC_EA_ATOMIC[26]\"]}, null]}},\n \"b26_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"}, \"&denom\"] }},\n\n \n \"b27_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[27]\"]}, \n { \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b27_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[27]\"}, \"&denom\"] } \n },\n \"b27_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[27]\"}, \"&denom\"] } \n },\n \"b27_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[27]\"}, \"&denom\"] } \n },\n \"b27_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[27]\", \"&TCC_EA_RDREQ[27]\"]}, null] } },\n \"b27_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[27]\", \"&TCC_EA_WRREQ[27]\"]}, null] } },\n \"b27_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[27]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[27]\", \"&TCC_EA_ATOMIC[27]\"]}, null]}},\n \"b27_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"}, \"&denom\"] }},\n\n \n \"b28_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[28]\"]}, \n { \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b28_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[28]\"}, \"&denom\"] } \n },\n \"b28_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[28]\"}, \"&denom\"] } \n },\n \"b28_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[28]\"}, \"&denom\"] } \n },\n \"b28_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[28]\", \"&TCC_EA_RDREQ[28]\"]}, null] } },\n \"b28_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[28]\", \"&TCC_EA_WRREQ[28]\"]}, null] } },\n \"b28_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[28]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[28]\", \"&TCC_EA_ATOMIC[28]\"]}, null]}},\n \"b28_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"}, \"&denom\"] }},\n\n \n \"b29_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[29]\"]}, \n { \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b29_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[29]\"}, \"&denom\"] } \n },\n \"b29_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[29]\"}, \"&denom\"] } \n },\n \"b29_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[29]\"}, \"&denom\"] } \n },\n \"b29_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[29]\"}, \"&denom\"] } \n },\n \"b29_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[29]\"}, \"&denom\"] }\n },\n \"b29_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[29]\", \"&TCC_EA_RDREQ[29]\"]}, null] } },\n \"b29_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[29]\", \"&TCC_EA_WRREQ[29]\"]}, null] } },\n \"b29_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[29]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[29]\", \"&TCC_EA_ATOMIC[29]\"]}, null]}},\n \"b29_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"}, \"&denom\"] }},\n\n \n \"b30_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[30]\"]}, \n { \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b30_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[30]\"}, \"&denom\"] } \n },\n \"b30_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[30]\"}, \"&denom\"] } \n },\n \"b30_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[30]\"}, \"&denom\"] } \n },\n \"b30_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[30]\", \"&TCC_EA_RDREQ[30]\"]}, null] } },\n \"b30_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[30]\", \"&TCC_EA_WRREQ[30]\"]}, null] } },\n \"b30_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[30]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[30]\", \"&TCC_EA_ATOMIC[30]\"]}, null]}},\n \"b30_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"}, \"&denom\"] }},\n\n \n \"b31_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[31]\"]}, \n { \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b31_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[31]\"}, \"&denom\"] } \n },\n \"b31_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[31]\"}, \"&denom\"] } \n },\n \"b31_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[31]\"}, \"&denom\"] } \n },\n \"b31_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[31]\"}, \"&denom\"] } \n },\n \"b31_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}, \"&denom\"] }\n },\n \"b31_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[31]\", \"&TCC_EA_RDREQ[31]\"]}, null] } },\n \"b31_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[31]\", \"&TCC_EA_WRREQ[31]\"]}, null] } },\n \"b31_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[31]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[31]\", \"&TCC_EA_ATOMIC[31]\"]}, null]}},\n \"b31_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}, \"&denom\"] }}\n\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"16\",\n \"Hit Rate\": \"&b16_hitRate\",\n \"Req\": \"&b16_req\",\n \"Read Req\": \"&b16_readReq\",\n \"Write Req\": \"&b16_writeReq\",\n \"AtomicReq\": \"&b16_atomicReq\",\n \"EA Read Req\": \"&b16_eaReadReq\",\n \"EA Write Req\": \"&b16_eaWriteReq\",\n \"EA AtomicReq\": \"&b16_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b16_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b16_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b16_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b16_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b16_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b16_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b16_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b16_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b16_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b16_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"17\",\n \"Hit Rate\": \"&b17_hitRate\",\n \"Req\": \"&b17_req\",\n \"Read Req\": \"&b17_readReq\",\n \"Write Req\": \"&b17_writeReq\",\n \"AtomicReq\": \"&b17_atomicReq\",\n \"EA Read Req\": \"&b17_eaReadReq\",\n \"EA Write Req\": \"&b17_eaWriteReq\",\n \"EA AtomicReq\": \"&b17_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b17_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b17_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b17_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b17_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b17_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b17_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b17_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b17_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b17_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b17_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"18\",\n \"Hit Rate\": \"&b18_hitRate\",\n \"Req\": \"&b18_req\",\n \"Read Req\": \"&b18_readReq\",\n \"Write Req\": \"&b18_writeReq\",\n \"AtomicReq\": \"&b18_atomicReq\",\n \"EA Read Req\": \"&b18_eaReadReq\",\n \"EA Write Req\": \"&b18_eaWriteReq\",\n \"EA AtomicReq\": \"&b18_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b18_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b18_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b18_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b18_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b18_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b18_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b18_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b18_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b18_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b18_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"19\",\n \"Hit Rate\": \"&b19_hitRate\",\n \"Req\": \"&b19_req\",\n \"Read Req\": \"&b19_readReq\",\n \"Write Req\": \"&b19_writeReq\",\n \"AtomicReq\": \"&b19_atomicReq\",\n \"EA Read Req\": \"&b19_eaReadReq\",\n \"EA Write Req\": \"&b19_eaWriteReq\",\n \"EA AtomicReq\": \"&b19_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b19_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b19_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b19_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b19_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b19_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b19_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b19_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b19_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b19_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b19_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"20\",\n \"Hit Rate\": \"&b20_hitRate\",\n \"Req\": \"&b20_req\",\n \"Read Req\": \"&b20_readReq\",\n \"Write Req\": \"&b20_writeReq\",\n \"AtomicReq\": \"&b20_atomicReq\",\n \"EA Read Req\": \"&b20_eaReadReq\",\n \"EA Write Req\": \"&b20_eaWriteReq\",\n \"EA AtomicReq\": \"&b20_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b20_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b20_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b20_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b20_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b20_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b20_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b20_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b20_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b20_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b20_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"21\",\n \"Hit Rate\": \"&b21_hitRate\",\n \"Req\": \"&b21_req\",\n \"Read Req\": \"&b21_readReq\",\n \"Write Req\": \"&b21_writeReq\",\n \"AtomicReq\": \"&b21_atomicReq\",\n \"EA Read Req\": \"&b21_eaReadReq\",\n \"EA Write Req\": \"&b21_eaWriteReq\",\n \"EA AtomicReq\": \"&b21_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b21_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b21_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b21_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b21_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b21_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b21_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b21_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b21_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b21_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b21_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"22\",\n \"Hit Rate\": \"&b22_hitRate\",\n \"Req\": \"&b22_req\",\n \"Read Req\": \"&b22_readReq\",\n \"Write Req\": \"&b22_writeReq\",\n \"AtomicReq\": \"&b22_atomicReq\",\n \"EA Read Req\": \"&b22_eaReadReq\",\n \"EA Write Req\": \"&b22_eaWriteReq\",\n \"EA AtomicReq\": \"&b22_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b22_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b22_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b22_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b22_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b22_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b22_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b22_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b22_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b22_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b22_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"23\",\n \"Hit Rate\": \"&b23_hitRate\",\n \"Req\": \"&b23_req\",\n \"Read Req\": \"&b23_readReq\",\n \"Write Req\": \"&b23_writeReq\",\n \"AtomicReq\": \"&b23_atomicReq\",\n \"EA Read Req\": \"&b23_eaReadReq\",\n \"EA Write Req\": \"&b23_eaWriteReq\",\n \"EA AtomicReq\": \"&b23_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b23_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b23_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b23_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b23_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b23_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b23_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b23_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b23_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b23_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b23_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"24\",\n \"Hit Rate\": \"&b24_hitRate\",\n \"Req\": \"&b24_req\",\n \"Read Req\": \"&b24_readReq\",\n \"Write Req\": \"&b24_writeReq\",\n \"AtomicReq\": \"&b24_atomicReq\",\n \"EA Read Req\": \"&b24_eaReadReq\",\n \"EA Write Req\": \"&b24_eaWriteReq\",\n \"EA AtomicReq\": \"&b24_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b24_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b24_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b24_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b24_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b24_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b24_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b24_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b24_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b24_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b24_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"25\",\n \"Hit Rate\": \"&b25_hitRate\",\n \"Req\": \"&b25_req\",\n \"Read Req\": \"&b25_readReq\",\n \"Write Req\": \"&b25_writeReq\",\n \"AtomicReq\": \"&b25_atomicReq\",\n \"EA Read Req\": \"&b25_eaReadReq\",\n \"EA Write Req\": \"&b25_eaWriteReq\",\n \"EA AtomicReq\": \"&b25_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b25_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b25_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b25_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b25_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b25_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b25_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b25_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b25_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b25_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b25_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"26\",\n \"Hit Rate\": \"&b26_hitRate\",\n \"Req\": \"&b26_req\",\n \"Read Req\": \"&b26_readReq\",\n \"Write Req\": \"&b26_writeReq\",\n \"AtomicReq\": \"&b26_atomicReq\",\n \"EA Read Req\": \"&b26_eaReadReq\",\n \"EA Write Req\": \"&b26_eaWriteReq\",\n \"EA AtomicReq\": \"&b26_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b26_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b26_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b26_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b26_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b26_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b26_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b26_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b26_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b26_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b26_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"27\",\n \"Hit Rate\": \"&b27_hitRate\",\n \"Req\": \"&b27_req\",\n \"Read Req\": \"&b27_readReq\",\n \"Write Req\": \"&b27_writeReq\",\n \"AtomicReq\": \"&b27_atomicReq\",\n \"EA Read Req\": \"&b27_eaReadReq\",\n \"EA Write Req\": \"&b27_eaWriteReq\",\n \"EA AtomicReq\": \"&b27_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b27_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b27_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b27_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b27_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b27_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b27_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b27_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b27_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b27_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b27_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"28\",\n \"Hit Rate\": \"&b28_hitRate\",\n \"Req\": \"&b28_req\",\n \"Read Req\": \"&b28_readReq\",\n \"Write Req\": \"&b28_writeReq\",\n \"AtomicReq\": \"&b28_atomicReq\",\n \"EA Read Req\": \"&b28_eaReadReq\",\n \"EA Write Req\": \"&b28_eaWriteReq\",\n \"EA AtomicReq\": \"&b28_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b28_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b28_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b28_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b28_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b28_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b28_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b28_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b28_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b28_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b28_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"29\",\n \"Hit Rate\": \"&b29_hitRate\",\n \"Req\": \"&b29_req\",\n \"Read Req\": \"&b29_readReq\",\n \"Write Req\": \"&b29_writeReq\",\n \"AtomicReq\": \"&b29_atomicReq\",\n \"EA Read Req\": \"&b29_eaReadReq\",\n \"EA Write Req\": \"&b29_eaWriteReq\",\n \"EA AtomicReq\": \"&b29_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b29_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b29_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b29_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b29_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b29_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b29_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b29_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b29_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b29_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b29_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"30\",\n \"Hit Rate\": \"&b30_hitRate\",\n \"Req\": \"&b30_req\",\n \"Read Req\": \"&b30_readReq\",\n \"Write Req\": \"&b30_writeReq\",\n \"AtomicReq\": \"&b30_atomicReq\",\n \"EA Read Req\": \"&b30_eaReadReq\",\n \"EA Write Req\": \"&b30_eaWriteReq\",\n \"EA AtomicReq\": \"&b30_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b30_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b30_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b30_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b30_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b30_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b30_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b30_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b30_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b30_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b30_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"31\",\n \"Hit Rate\": \"&b31_hitRate\",\n \"Req\": \"&b31_req\",\n \"Read Req\": \"&b31_readReq\",\n \"Write Req\": \"&b31_writeReq\",\n \"AtomicReq\": \"&b31_atomicReq\",\n \"EA Read Req\": \"&b31_eaReadReq\",\n \"EA Write Req\": \"&b31_eaWriteReq\",\n \"EA AtomicReq\": \"&b31_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b31_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b31_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b31_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b31_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b31_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b31_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b31_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b31_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b31_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b31_ea_write_stall_too_many\"\n\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 16-31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 81 + }, + "id": 93, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 81 + }, + "id": 94, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 89 + }, + "id": 187, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 89 + }, + "id": 201, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 97 + }, + "id": 220, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 97 + }, + "id": 227, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 221, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 105 + }, + "id": 228, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 222, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 113 + }, + "id": 229, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 121 + }, + "id": 223, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 121 + }, + "id": 230, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 129 + }, + "id": 225, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 129 + }, + "id": 231, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 137 + }, + "id": 224, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 137 + }, + "id": 232, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 145 + }, + "id": 226, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 145 + }, + "id": 233, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-rocprofiler-compute-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache (per Channel)", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 34, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + "hide": 0, + "includeAll": false, + "label": "Normalization", + "multi": false, + "name": "normUnit", + "options": [ + { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + { + "selected": false, + "text": "\"per Cycle\"", + "value": "\"per Cycle\"" + }, + { + "selected": false, + "text": "\"per Sec\"", + "value": "\"per Sec\"" + }, + { + "selected": false, + "text": "\"per Kernel\"", + "value": "\"per Kernel\"" + } + ], + "query": "\"per Wave\",\n\"per Cycle\",\n\"per Sec\",\n\"per Kernel\"", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "L2 Channels", + "multi": false, + "name": "L2Banks", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SEs", + "multi": false, + "name": "numSE", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "104", + "value": "104" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#CUs", + "multi": false, + "name": "numCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SCLK (MHz)", + "multi": false, + "name": "sclk", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SQC", + "multi": false, + "name": "numSQC", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "HBM BW (GB/s)", + "multi": false, + "name": "hbmBW", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "rocprofiler-compute_asw_mixbench_mi200", + "value": "rocprofiler-compute_asw_mixbench_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Workload", + "multi": false, + "name": "Workload1", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "435369", + "value": "435369" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Kernel Cycles", + "multi": false, + "name": "kernelBusyCycles", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "103", + "value": "103" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Active CUs", + "multi": false, + "name": "numActiveCUs", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Filtered Dispatch ID", + "multi": false, + "name": "DispatchIDFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Dispatch Filter", + "name": "DispatchID", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "GCD", + "multi": false, + "name": "gpuFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Kernels", + "multi": true, + "name": "KernelNameFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "rocprofiler-compute_asw_mixbench_mi200", + "value": "rocprofiler-compute_asw_mixbench_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline Workload", + "multi": false, + "name": "Workload2", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "103", + "value": "103" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline ActiveCUs", + "multi": false, + "name": "numActiveCUs2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Baseline Dispatch IDs", + "multi": false, + "name": "DispatchIDFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID2:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Baseline Dispatch Filter", + "name": "DispatchID2", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline GCD", + "multi": false, + "name": "gpuFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Baseline Kernels", + "multi": true, + "name": "KernelNameFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "System Info" + ], + "value": [ + "System Info" + ] + }, + "hide": 0, + "includeAll": false, + "label": "Comparison Panels", + "multi": true, + "name": "select", + "options": [ + { + "selected": true, + "text": "System Info", + "value": "System Info" + }, + { + "selected": false, + "text": "System Speed-of-Light", + "value": "System Speed-of-Light" + }, + { + "selected": false, + "text": "Roofline", + "value": "Roofline" + }, + { + "selected": false, + "text": "Command Processor", + "value": "Command Processor" + }, + { + "selected": false, + "text": "Shader Processor Input", + "value": "Shader Processor Input" + }, + { + "selected": false, + "text": "Wavefront", + "value": "Wavefront" + }, + { + "selected": false, + "text": "Compute Pipeline", + "value": "Compute Pipeline" + }, + { + "selected": false, + "text": "Instruction Mix", + "value": "Instruction Mix" + }, + { + "selected": false, + "text": "Local Data Share", + "value": "Local Data Share" + }, + { + "selected": false, + "text": "Instruction Cache", + "value": "Instruction Cache" + }, + { + "selected": false, + "text": "Scalar L1D Cache", + "value": "Scalar L1D Cache" + }, + { + "selected": false, + "text": "Texture Addr and Data", + "value": "Texture Addr and Data" + }, + { + "selected": false, + "text": "Vector L1D Cache", + "value": "Vector L1D Cache" + }, + { + "selected": false, + "text": "L2 Cache", + "value": "L2 Cache" + } + ], + "query": "System Info, \nSystem Speed-of-Light, \nRoofline,\nCommand Processor, \nShader Processor Input, \nWavefront,\nCompute Pipeline, \nInstruction Mix,\nLocal Data Share, \nInstruction Cache, \nScalar L1D Cache, \nTexture Addr and Data, \nVector L1D Cache,\nL2 Cache", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline L2 Channels", + "multi": false, + "name": "L2Banks2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SEs", + "multi": false, + "name": "numSE2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "104", + "value": "104" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #CUs", + "multi": false, + "name": "numCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline SCLK (MHz)", + "multi": false, + "name": "sclk2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SQC", + "multi": false, + "name": "numSQC2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline HBM BW (GB/s)", + "multi": false, + "name": "hbmBW2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "mi200", + "value": "mi200" + }, + "definition": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SOC", + "multi": false, + "name": "soc", + "options": [], + "query": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "5", + "value": "5" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "TopN", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "20", + "value": "20" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "query": "1,5,10,15,20,50,100", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2021-11-04T14:21:39.749Z", + "to": "2021-11-08T14:21:39.749Z" + }, + "timepicker": {}, + "timezone": "", + "title": "rocprofiler-compute_v1.0.3_pub", + "uid": "rocprofiler-compute_v1_0_063020221", + "version": 4, + "weekStart": "" +} diff --git a/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.5_pub.json b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.5_pub.json new file mode 100644 index 0000000000..9e32666d7a --- /dev/null +++ b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.5_pub.json @@ -0,0 +1,13331 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 41, + "iteration": 1670355676329, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 217, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 23, + "w": 13, + "x": 0, + "y": 1 + }, + "id": 159, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.sysinfo.aggregate([\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.sysinfo.aggregate([\n {\"$match\": {\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(System Info)\"}}\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "System Info", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true + }, + "indexByName": {}, + "renameByName": { + "Value 1": "Current", + "Value 2": "Baseline" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Info", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 108, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "decimals": 0, + "links": [], + "mappings": [ + { + "options": { + "match": "false", + "result": { + "index": 0 + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text" + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Percent of Peak - PoP" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.width", + "value": 252 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit 1" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 137 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 125 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 161 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 15, + "x": 0, + "y": 2 + }, + "id": 110, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n },\n\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs,\n \"Unit\": \"CUs\",\n \"peak\": $numCU,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n },\n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }}\n\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2, 4] }] }\n },\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs2,\n \"Unit\": \"CUs\",\n \"peak\": $numCU2,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs2] }, $numCU2]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk2, $numCU2, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n }, \n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU2, $numCU2] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU2, $numCU2] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC2, { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]\n }}\n\n ]);", + "type": "table" + } + ], + "title": "Speed of Light", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Metric 1": 0, + "Metric 2": 7, + "Percent of Peak - PoP 1": 5, + "Percent of Peak - PoP 2": 6, + "Unit 1": 8, + "Unit 2": 9, + "Value 1": 1, + "Value 2": 2, + "peak 1": 3, + "peak 2": 4 + }, + "renameByName": { + "Percent of Peak - PoP": "Pct-of-Peak", + "Percent of Peak - PoP 1": "Pct-of-Peak (Current)", + "Percent of Peak - PoP 2": "Pct-of-Peak (Baseline)", + "Unit": "", + "Value": "Avg", + "Value 1": "Avg (Current)", + "Value 2": "Avg (Baseline)", + "peak": "Theoretical Max", + "peak 1": "Theoretical Max (Current)", + "peak 2": "Theoretical Max (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 175, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n],\n{ allowDiskUse: true }\n);", + "type": "table" + } + ], + "title": "Dispatch IDs - Current", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 215, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Dispatch IDs - Baseline", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Speed-of-Light", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 36, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 157, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + } + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "u5Z2zJhnk" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"elapsedTime1\": {\n \"$divide\": [{\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}, 1000]\n }\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"elapsedTime1\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Kernel Time Histogram", + "transparent": true, + "type": "histogram" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 123 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Performance" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Peak FLOPs" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 213, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "L1 Cache (Bytes)" + } + ] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128 ]} \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n \n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n }}\n]);", + "type": "table" + } + ], + "title": "Top Kernels", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Name", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "peak_flops": "Peak FLOPs", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS " + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 87 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 153 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS (Bytes)" + }, + "properties": [ + { + "id": "custom.width", + "value": 98 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + }, + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dispatch" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 251, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&Index\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n\n }}\n]);", + "type": "table" + } + ], + "title": "Top Dispatches", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "peak_flops": 19, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS ", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Dispatch", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Kernel Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 40, + "panels": [ + { + "description": "All transaction units default to Billion, when per-sec norm is used", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 285, + "options": { + "addAllIDs": false, + "captureMappings": false, + "eventAutoComplete": true, + "eventSource": "options.animateLogo(svgmap, data);\r\nconsole.log(\"Starting render\");\r\nlet buff = data.series[0].fields[2].values.buffer;\r\nlet valueCount = buff.length;\r\nconsole.log(\"The buff is \", valueCount, \" long\");\r\n\r\nsvgmap.wave_life_.text(buff[0]);\r\nsvgmap.active_cu_.text(buff[1]);\r\nsvgmap.salu_.text(buff[2]);\r\nsvgmap.smem_.text(buff[3]);\r\nsvgmap.valu_.text(buff[4]);\r\nsvgmap.mfma_.text(buff[5]);\r\nsvgmap.vmem_.text(buff[6]);\r\nsvgmap.lds_.text(buff[7]);\r\nsvgmap.gws_.text(buff[8]);\r\nsvgmap.br_.text(buff[9]);\r\nsvgmap.vgpr_.text(buff[10]);\r\nsvgmap.sgpr_.text(buff[11]);\r\nsvgmap.lds_alloc_.text(buff[12]);\r\nsvgmap.scratch_alloc_.text(buff[13]);\r\nsvgmap.wavefronts_.text(buff[14]);\r\nsvgmap.workgroups_.text(buff[15]);\r\nsvgmap.lds_req_.text(buff[16]);\r\nsvgmap.il1_fetch_.text(buff[17]);\r\nsvgmap.il1_hit_.text(buff[18]);\r\nsvgmap.il1_l2_rd_.text(buff[19]);\r\nsvgmap.sl1_rd_.text(buff[20]);\r\nsvgmap.sl1_hit_.text(buff[21]);\r\nsvgmap.sl1_l2_rd_.text(buff[22]);\r\nsvgmap.sl1_l2_wr_.text(buff[23]);\r\nsvgmap.sl1_l2_atom_.text(buff[24]);\r\nsvgmap.vl1_rd_.text(buff[25]);\r\nsvgmap.vl1_wr_.text(buff[26]);\r\nsvgmap.vl1_atom_.text(buff[27]);\r\nsvgmap.vl1_hit_.text(buff[28]);\r\nsvgmap.vl1_lat_.text(buff[29]);\r\nsvgmap.vl1_l2_rd_.text(buff[30]);\r\nsvgmap.vl1_l2_wr_.text(buff[31]);\r\nsvgmap.vl1_l2_atom_.text(buff[32]);\r\nsvgmap.l2_rd_.text(buff[33]);\r\nsvgmap.l2_wr_.text(buff[34])\r\nsvgmap.l2_atom_.text(buff[35]);\r\nsvgmap.l2_hit_.text(buff[36]);\r\nsvgmap.l2_rd_lat_.text(buff[37]);\r\nsvgmap.l2_wr_lat_.text(buff[38]);\r\nsvgmap.fabric_rd_lat_.text(buff[39]);\r\nsvgmap.fabric_wr_lat_.text(buff[40]);\r\nsvgmap.fabric_atom_lat_.text(buff[41]);\r\nsvgmap.l2_fabric_rd_.text(buff[42]);\r\nsvgmap.l2_fabric_wr_.text(buff[43]);\r\nsvgmap.l2_fabric_atom_.text(buff[44]);\r\nsvgmap.hbm_rd_.text(buff[45]);\r\nsvgmap.hbm_wr_.text(buff[46]);\r\nsvgmap.lds_util_.text(buff[47]);\r\nsvgmap.vl1_coales_.text(buff[48]);\r\nsvgmap.vl1_stall_.text(buff[49]);\r\nsvgmap.wave_occ_.text(buff[50]);\r\nsvgmap.lds_lat_.text(buff[51]);\r\nsvgmap.il1_lat_.text(buff[52]);\r\nsvgmap.sl1_lat_.text(buff[53]);\r\nsvgmap.gds_req_.text(buff[54]);", + "initAutoComplete": true, + "initSource": "options.animateLogo = (svgmap, data) => {\r\n \r\n}\r\n ", + "svgMappings": [ + { + "mappedName": "wave_life_", + "svgId": "wave_life" + }, + { + "mappedName": "wave_occ_", + "svgId": "wave_occ" + }, + { + "mappedName": "salu_", + "svgId": "salu" + }, + { + "mappedName": "smem_", + "svgId": "smem" + }, + { + "mappedName": "valu_", + "svgId": "valu" + }, + { + "mappedName": "mfma_", + "svgId": "mfma" + }, + { + "mappedName": "vmem_", + "svgId": "vmem" + }, + { + "mappedName": "lds_", + "svgId": "lds" + }, + { + "mappedName": "gws_", + "svgId": "gws" + }, + { + "mappedName": "br_", + "svgId": "br" + }, + { + "mappedName": "active_cu_", + "svgId": "active_cu" + }, + { + "mappedName": "vgpr_", + "svgId": "vgpr" + }, + { + "mappedName": "sgpr_", + "svgId": "sgpr" + }, + { + "mappedName": "lds_alloc_", + "svgId": "lds_alloc" + }, + { + "mappedName": "scratch_alloc_", + "svgId": "scratch_alloc" + }, + { + "mappedName": "wavefronts_", + "svgId": "wavefronts" + }, + { + "mappedName": "workgroups_", + "svgId": "workgroups" + }, + { + "mappedName": "lds_req_", + "svgId": "lds_req" + }, + { + "mappedName": "vl1_wr_", + "svgId": "vl1_wr" + }, + { + "mappedName": "vl1_atom_", + "svgId": "vl1_atom" + }, + { + "mappedName": "sl1_rd_", + "svgId": "sl1_rd" + }, + { + "mappedName": "il1_fetch_", + "svgId": "il1_fetch" + }, + { + "mappedName": "lds_lat_", + "svgId": "lds_lat" + }, + { + "mappedName": "lds_bw_", + "svgId": "lds_bw" + }, + { + "mappedName": "lds_util_", + "svgId": "lds_util" + }, + { + "mappedName": "vl1_hit_", + "svgId": "vl1_hit" + }, + { + "mappedName": "vl1_lat_", + "svgId": "vl1_lat" + }, + { + "mappedName": "vl1_coales_", + "svgId": "vl1_coales" + }, + { + "mappedName": "vl1_stall_", + "svgId": "vl1_stall" + }, + { + "mappedName": "sl1_hit_", + "svgId": "sl1_hit" + }, + { + "mappedName": "sl1_lat_", + "svgId": "sl1_lat" + }, + { + "mappedName": "il1_hit_", + "svgId": "il1_hit" + }, + { + "mappedName": "il1_lat_", + "svgId": "il1_lat" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "il1_l2_rd_", + "svgId": "il1_l2_rd" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "l2_rd_", + "svgId": "l2_rd" + }, + { + "mappedName": "l2_wr_", + "svgId": "l2_wr" + }, + { + "mappedName": "l2_atom_", + "svgId": "l2_atom" + }, + { + "mappedName": "l2_hit_", + "svgId": "l2_hit" + }, + { + "mappedName": "l2_rd_lat_", + "svgId": "l2_rd_lat" + }, + { + "mappedName": "l2_wr_lat_", + "svgId": "l2_wr_lat" + }, + { + "mappedName": "l2_fabric_rd_", + "svgId": "l2_fabric_rd" + }, + { + "mappedName": "l2_fabric_wr_", + "svgId": "l2_fabric_wr" + }, + { + "mappedName": "l2_fabric_atom_", + "svgId": "l2_fabric_atom" + }, + { + "mappedName": "fabric_rd_lat_", + "svgId": "fabric_rd_lat" + }, + { + "mappedName": "fabric_wr_lat_", + "svgId": "fabric_wr_lat" + }, + { + "mappedName": "fabric_atom_lat_", + "svgId": "fabric_atom_lat" + }, + { + "mappedName": "fabric_hbm_rd_", + "svgId": "fabric_hbm_rd" + }, + { + "mappedName": "fabric_hbm_wr_", + "svgId": "fabric_hbm_wr" + }, + { + "mappedName": "vl1_rd_", + "svgId": "vl1_rd" + }, + { + "mappedName": "vl1_l2_rd_", + "svgId": "vl1_l2_rd" + }, + { + "mappedName": "vl1_l2_wr_", + "svgId": "vl1_l2_wr" + }, + { + "mappedName": "vl1_l2_atom_", + "svgId": "vl1_l2_atom" + }, + { + "mappedName": "hbm_rd_", + "svgId": "hbm_rd" + }, + { + "mappedName": "hbm_wr_", + "svgId": "hbm_wr" + } + ], + "svgSource": "\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n Wave Occupancy\r\n \r\n Wave Life\r\n \r\n \r\n \r\n xGMI /\r\n PCIe\r\n \r\n GMI\r\n \r\n HBM\r\n \r\n Fabric\r\n \r\n \r\n SALU:\r\n 00000\r\n \r\n \r\n SMEM:\r\n 00000\r\n \r\n \r\n VALU:\r\n 00000\r\n \r\n \r\n MFMA:\r\n 00000\r\n \r\n \r\n VMEM:\r\n 00000\r\n \r\n \r\n LDS:\r\n 00000\r\n \r\n \r\n GWS:\r\n 00000\r\n \r\n \r\n Br:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n \r\n \r\n cycles\r\n Atomic:\r\n 00000\r\n \r\n \r\n Rd:\r\n 00000\r\n \r\n \r\n Wr:\r\n \r\n \r\n 00000\r\n \r\n \r\n Atomic:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n 00000\r\n Rd:\r\n 00000\r\n Wr:\r\n 00000\r\n Req:\r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n per-GCD\r\n cycles\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n Wave 0 Instr buff\r\n Wave N-1 Instr buff\r\n Active CUs\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Util:\r\n 00000\r\n \r\n \r\n %\r\n Coales:\r\n 00000\r\n Exec\r\n Instr Buff\r\n Instr Dispatch\r\n LDS\r\n Vector L1 Cache\r\n Scalar L1D Cache\r\n Instr L1 Cache\r\n L2 Cache\r\n 00000\r\n Req:\r\n \r\n \r\n %\r\n Stall:\r\n 00000\r\n 00000\r\n Fetch:\r\n 0000000\r\n 00000\r\n 000/000\r\n \r\n Latency\r\n \r\n LDS Alloc:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n \r\n Scratch Alloc:\r\n \r\n 00000\r\n \r\n Wavefronts:\r\n \r\n 00000\r\n \r\n Workgroups:\r\n \r\n 00000\r\n \r\n VGPRs:\r\n \r\n 00000\r\n \r\n SGPRs:\r\n \r\n 00000\r\n \r\n \r\n 00000\r\n Rd:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n Latency\r\n \r\n \r\n \r\n \r\n Text is not SVG - cannot display\r\n \r\n \r\n" + }, + "pluginVersion": "8.4.0", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&vgpr\"\n },\n \"sgpr\": {\n \"$avg\": \"&sgpr\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&lds\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&scr\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$numCU\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_LEVEL_WAVES", + "target": "$Workload1.SQ_LEVEL_WAVES.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_occ\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\",\"&GRBM_GUI_ACTIVE\"] }, $numActiveCUs]}\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Alias\": \"wave_occ_\",\n \"Value\":{ \"$round\": [\"&wave_occ\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "$Workload1.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"lds_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&SQ_INSTS_LDS\", 0] },\n { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\"] },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"LDS Lat\",\n \"Alias\": \"lds_lat_\",\n \"Value\":{ \"$round\": [\"&lds_lat\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_ICACHE_INFLIGHT", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"il1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_ICACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_ICACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"IL1 Lat\",\n \t\t\t\"Alias\": \"il1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&il1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_DCACHE_INFLIGHT_LEVEL", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"sl1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_DCACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_DCACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"vL1D Lat\",\n \t\t\t\"Alias\": \"sl1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&sl1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + } + ], + "title": "Memory Chart (Normalization: $normUnit\")", + "transformations": [ + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "string", + "targetField": "Value" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "amd-custom-svg" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Memory Chart Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 241, + "panels": [ + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 253, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm" + }, + "name": "HBM-VLAU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2" + }, + "name": "L2-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1" + }, + "name": "vL1D-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS" + }, + "name": "LDS-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA" + }, + "name": "HBM-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA" + }, + "name": "L2-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA" + }, + "name": "vL1D-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA" + }, + "name": "LDS-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "rawQuery": true, + "refId": "HBM-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"&high_flop\"\n }\n },\n\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP32/FP64 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + }, + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 312, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_fp16" + }, + "name": "HBM-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_fp16" + }, + "name": "L2-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_fp16" + }, + "name": "vL1D-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_fp16" + }, + "name": "LDS-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_i8" + }, + "name": "HBM-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_i8" + }, + "name": "L2-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_i8" + }, + "name": "vL1D-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_i8" + }, + "name": "LDS-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP16/INT8 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Roofline Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 2, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 13, + "x": 0, + "y": 62 + }, + "id": 6, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Fetcher", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 171 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baselin)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 147 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 11, + "x": 13, + "y": 62 + }, + "id": 4, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Compute", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Metric 1": "", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Command Processor (CPC/CPF)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 102, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 101 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 145 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 97 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 123 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 106, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 285 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 242 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 104, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Resource Allocation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Shader Processor Input (SPI)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 185, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 142 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 196 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 174 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.width", + "value": 168 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min" + }, + "properties": [ + { + "id": "custom.width", + "value": 272 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 225 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 64 + }, + "id": 10, + "interval": "12h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Wavefront Launch Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true, + "Units 2": true, + "metric 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 223 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 64 + }, + "id": 34, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "Wavefront Runtime Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg": "", + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "", + "Unit 2": "" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Wavefront", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 209, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 12, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n \n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Instruction Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 24, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 24, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "limit": 100, + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VALU Arithmetic Instr Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 275, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VMEM Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "VMEM Instr", + "type 1": "VMEM Instr" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 16, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"mmfa_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&SQ_WAVES\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mmfa_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mmfa_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mmfa_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"mmfa_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&SQ_WAVES\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mmfa_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mmfa_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mmfa_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "MFMA Arithmetic Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "MFMA Instr", + "type 1": "MFMA Instr" + } + } + } + ], + "transparent": true, + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Instruction Mix", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 8, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 211, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n\n \"instr_val\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Compute Pipeline", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "mfma_flops_bf16_pop 1": 4, + "mfma_flops_bf16_pop 2": 5, + "mfma_flops_f16_pop 1": 6, + "mfma_flops_f16_pop 2": 7, + "mfma_flops_f32_pop 1": 8, + "mfma_flops_f32_pop 2": 9, + "mfma_flops_f64_pop 1": 10, + "mfma_flops_f64_pop 2": 11, + "mfma_flops_i8_pop 1": 12, + "mfma_flops_i8_pop 2": 13, + "valu_flops_pop 1": 0, + "valu_flops_pop 2": 1, + "valu_iops_pop 1": 2, + "valu_iops_pop 2": 3 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "flops_pop": "FLOPs", + "flops_pop 1": "FLOPs (Current)", + "flops_pop 2": "FLOPs (Baseline)", + "iops_pop": "IOPs", + "iops_pop 1": "IOPs (Current)", + "iops_pop 2": "IOPs (Baseline)", + "mfma_flops_bf16_pop": "MFMA- BF16 (FLOPs)", + "mfma_flops_bf16_pop 1": "MFMA-BF16 (Cur)", + "mfma_flops_bf16_pop 2": "MFMA-BF16 (Baseline)", + "mfma_flops_f16_pop": "MFMA-F16 (FLOPs)", + "mfma_flops_f16_pop 1": "MFMA-F16 (Cur)", + "mfma_flops_f16_pop 2": "MFMA-F16 (Baseline)", + "mfma_flops_f32_pop": "MFMA-F32 (FLOPs)", + "mfma_flops_f32_pop 1": "MFMA-F32 (Cur)", + "mfma_flops_f32_pop 2": "MFMA-F32 (Baseline)", + "mfma_flops_f64_pop": "MFMA-F64 (FLOPs)", + "mfma_flops_f64_pop 1": "MFMA-F64 (Cur)", + "mfma_flops_f64_pop 2": "MFMA-F64 (Baseline)", + "mfma_flops_i8_pop": "MFMA-i8 (IOPs)", + "mfma_flops_i8_pop 1": "MFMA-I8 (Cur)", + "mfma_flops_i8_pop 2": "MFMA-I8 (Baseline)", + "valu_flops_pop": "VALU (FLOPs)", + "valu_flops_pop 1": "VALU FLOPs (Cur)", + "valu_flops_pop 2": "VALU FLOPs (Baseline)", + "valu_iops_pop": "VALU (IOPs)", + "valu_iops_pop 1": "VALU IOPs (Cur)", + "valu_iops_pop 2": "VALU IOPs (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 257, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg\": \"&avg_ipcAvg\",\n \"Min\": \"&min_ipcAvg\",\n \"Max\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg\": \"&avg_ipcIssue\",\n \"Min\": \"&min_ipcIssue\",\n \"Max\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg\": \"&avg_saluUtil\",\n \"Min\": \"&min_saluUtil\",\n \"Max\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg\": \"&avg_valuUtil\",\n \"Min\": \"&min_valuUtil\",\n \"Max\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg\": \"&avg_unpredthreads_val\",\n \"Min\": \"&min_unpredthreads_val\",\n \"Max\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg\": \"&avg_mfmaUtil\",\n \"Min\": \"&min_mfmaUtil\",\n \"Max\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg\": \"&avg_mfmaInstrCycles\",\n \"Min\": \"&min_mfmaInstrCycles\",\n \"Max\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg 2\": \"&avg_ipcAvg\",\n \"Min 2\": \"&min_ipcAvg\",\n \"Max 2\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg 2\": \"&avg_ipcIssue\",\n \"Min 2\": \"&min_ipcIssue\",\n \"Max 2\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg 2\": \"&avg_saluUtil\",\n \"Min 2\": \"&min_saluUtil\",\n \"Max 2\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg 2\": \"&avg_valuUtil\",\n \"Min 2\": \"&min_valuUtil\",\n \"Max 2\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg 2\": \"&avg_unpredthreads_val\",\n \"Min 2\": \"&min_unpredthreads_val\",\n \"Max 2\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg 2\": \"&avg_mfmaUtil\",\n \"Min 2\": \"&min_mfmaUtil\",\n \"Max 2\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg 2\": \"&avg_mfmaInstrCycles\",\n \"Min 2\": \"&min_mfmaInstrCycles\",\n \"Max 2\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Pipeline Stats", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg 2": "Avg (Baseline)", + "Max 2": "Max (Baseline)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 96, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Arithmetic Operations", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 255, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM", + "target": "${Workload1}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg\": \"&avg_vmemLat\",\n \"Min\": \"&min_vmemLat\",\n \"Max\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM", + "target": "${Workload1}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg\":\"&avg_smemLat\",\n \"Min\":\"&min_smemLat\",\n \"Max\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL", + "target": "${Workload1}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg\":\"&avg_instrFetchLat\",\n \"Min\":\"&min_instrFetchLat\",\n \"Max\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "${Workload1}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg\":\"&avg_ldsLat\",\n \"Min\":\"&min_ldsLat\",\n \"Max\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg 2\": \"&avg_vmemLat\",\n \"Min 2\": \"&min_vmemLat\",\n \"Max 2\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg 2\":\"&avg_smemLat\",\n \"Min 2\":\"&min_smemLat\",\n \"Max 2\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL2", + "target": "${Workload2}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg 2\":\"&avg_instrFetchLat\",\n \"Min 2\":\"&min_instrFetchLat\",\n \"Max 2\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS2", + "target": "${Workload2}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg 2\":\"&avg_ldsLat\",\n \"Min 2\":\"&min_ldsLat\",\n \"Max 2\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + } + ], + "title": "Memory Latencies", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Compute Pipeline", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 98, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 205, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: LDS", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Access Rate 1": 6, + "Access Rate 2": 7, + "Bandwith (Pct-of-Peak) 1": 0, + "Bandwith (Pct-of-Peak) 2": 1, + "Bank Conflict Rate 1": 2, + "Bank Conflict Rate 2": 3, + "Utilization 1": 4, + "Utilization 2": 5 + }, + "renameByName": { + "Access Rate 1": "Access Rate (Current)", + "Access Rate 2": "Access Rate (Baseline)", + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "Utilization 1": "Util (Current)", + "Utilization 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "min": -100000000000000000000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 141 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "LDS Stats", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "frame", + "frameNameMode": "field" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Local Data Share (LDS)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 44, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 48, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\" ] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Instruction Cache ", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW (Pct-of-Peak) 1": 4, + "BW (Pct-of-Peak) 2": 5, + "Cache Hit 1": 6, + "Cache Hit 2": 7, + "Stall 1": 2, + "Stall 2": 3, + "Util 1": 0, + "Util 2": 1 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 259, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&avg_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&avg_hits\",\n \"Min\": \"&min_hits\",\n \"Max\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&avg_misses\",\n \"Min\": \"&min_misses\",\n \"Max\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean\": \"&avg_misses_dup\",\n \"Min\": \"&min_misses_dup\",\n \"Max\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n \n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&avg_cacheHit\",\n \"Min\": \"&min_cacheHit\",\n \"Max\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&avg_req\",\n \"Min 2\": \"&min_req\",\n \"Max 2\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&avg_hits\",\n \"Min 2\": \"&min_hits\",\n \"Max 2\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&avg_misses\",\n \"Min 2\" : \"&min_misses\",\n \"Max 2\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean 2\": \"&avg_misses_dup\",\n \"Min 2\": \"&min_misses_dup\",\n \"Max 2\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&avg_cacheHit\",\n \"Min 2\": \"&min_cacheHit\",\n \"Max 2\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Instruction Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "L1I Metric": "", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Instruction Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 203, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L1K-TC BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 54, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "sY628IJnz" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Scalar L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW Pct-of-Peak 1": 0, + "BW Pct-of-Peak 2": 1, + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "Stall 1": 6, + "Stall 2": 7, + "Util 1": 4, + "Util 2": 5 + }, + "renameByName": { + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 261, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\": \"&req_min\",\n \"Max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\": \"&hits_min\",\n \"Max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&misses_avg\",\n \"Min\": \"&misses_min\",\n \"Max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean\": \"&dup_misses_avg\",\n \"Min\": \"&dup_misses_min\",\n \"Max\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&cacheHit_avg\",\n \"Min\": \"&cacheHit_min\",\n \"Max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean\": \"&read1d_avg\",\n \"Min\": \"&read1d_min\",\n \"Max\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean\": \"&read2d_avg\",\n \"Min\": \"&read2d_min\",\n \"Max\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean\": \"&read4d_avg\",\n \"Min\": \"&read4d_min\",\n \"Max\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean\": \"&read8d_avg\",\n \"Min\": \"&read8d_min\",\n \"Max\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean\": \"&read16d_avg\",\n \"Min\": \"&read16d_min\",\n \"Max\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }}, \n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&req_avg\",\n \"Min 2\": \"&req_min\",\n \"Max 2\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&hits_avg\",\n \"Min 2\": \"&hits_min\",\n \"Max 2\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&misses_avg\",\n \"Min 2\": \"&misses_min\",\n \"Max 2\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean 2\": \"&dup_misses_avg\",\n \"Min 2\": \"&dup_misses_min\",\n \"Max 2\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&cacheHit_avg\",\n \"Min 2\": \"&cacheHit_min\",\n \"Max 2\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean 2\": \"&readReq_avg\",\n \"Min 2\": \"&readReq_min\",\n \"Max 2\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req (Total)\",\n \"Mean 2\": \"&writeReq_avg\",\n \"Min 2\": \"&writeReq_min\",\n \"Max 2\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean 2\": \"&atomicReq_avg\",\n \"Min 2\": \"&atomicReq_min\",\n \"Max 2\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean 2\": \"&read1d_avg\",\n \"Min 2\": \"&read1d_min\",\n \"Max 2\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean 2\": \"&read2d_avg\",\n \"Min 2\": \"&read2d_min\",\n \"Max 2\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean 2\": \"&read4d_avg\",\n \"Min 2\": \"&read4d_min\",\n \"Max 2\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean 2\": \"&read8d_avg\",\n \"Min 2\": \"&read8d_min\",\n \"Max 2\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean 2\": \"&read16d_avg\",\n \"Min 2\": \"&read16d_min\",\n \"Max 2\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 134 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 52, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache - L2 Interface", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Scalar L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 130, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 132, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "TA", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 136 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 134, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "TD", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Texture Addresser and Texture Data (TA/TD)", + "type": "row" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 112, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Vector L1 Data Cache", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 165, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Vector L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "128B Read Combining 1": 6, + "128B Read Combining 2": 7, + "Buffer Coalescing 1": 0, + "Buffer Coalescing 2": 1, + "Cache BW 1": 2, + "Cache BW 2": 3, + "Cache Hit 1": 4, + "Cache Hit 2": 5 + }, + "renameByName": { + "128B Read Combining 1": "128B Read Combining (Current)", + "128B Read Combining 2": "128B Read Combining(Baseline)", + "Buffer Coalescing 1": "Buf Coalescing (Current)", + "Buffer Coalescing 2": "Buf Coalescing (Baseline)", + "Cache BW 1": "Cache BW (Current)", + "Cache BW 2": "Cache BW (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Cache Util 1": "Cache Util (Current)", + "Cache Util 2": "Cache Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-background" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 52 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 199 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 116, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Stalls", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true, + "unit 2": true + }, + "indexByName": { + "Max 1": 6, + "Max 2": 7, + "Mean 1": 2, + "Mean 2": 3, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 4, + "Min 2": 5, + "unit 1": 9, + "unit 2": 8 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "unit 1": "Unit" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 116 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 78 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 50 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 128, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n\n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Coherency", + "GroupCols": 2, + "GroupGap": 5, + "GroupLabelColor": "#FF9830", + "GroupLabelFontSize": "100%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Xfer", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FADE2A", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:172", + "Col": 2, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Mean", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 120, + "pluginVersion": "8.2.1", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n\n \"readNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \n \"writeNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \n \"atomicNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"NC\",\n \"Mean\": \"&readNC_avg\",\n \"Min\": \"&readNC_min\",\n \"Max\": \"&readNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"UC\",\n \"Mean\": \"&readUC_avg\",\n \"Min\": \"&readUC_min\",\n \"Max\": \"&readUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"CC\",\n \"Mean\": \"&readCC_avg\",\n \"Min\": \"&readCC_min\",\n \"Max\": \"&readCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"RW\",\n \"Mean\": \"&readRW_avg\",\n \"Min\": \"&readRW_min\",\n \"Max\": \"&readRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"RW\",\n \"Mean\": \"&writeRW_avg\",\n \"Min\": \"&writeRW_min\",\n \"Max\": \"&writeRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"NC\",\n \"Mean\": \"&writeNC_avg\",\n \"Min\": \"&writeNC_min\",\n \"Max\": \"&writeNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"UC\",\n \"Mean\": \"&writeUC_avg\",\n \"Min\": \"&writeUC_min\",\n \"Max\": \"&writeUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"CC\",\n \"Mean\": \"&writeCC_avg\",\n \"Min\": \"&writeCC_min\",\n \"Max\": \"&writeCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"NC\",\n \"Mean\": \"&atomicNC_avg\",\n \"Min\": \"&atomicNC_min\",\n \"Max\": \"&atomicNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"UC\",\n \"Mean\": \"&atomicUC_avg\",\n \"Min\": \"&atomicUC_min\",\n \"Max\": \"&atomicUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"CC\",\n \"Mean\": \"&atomicCC_avg\",\n \"Min\": \"&atomicCC_min\",\n \"Max\": \"&atomicCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"RW\",\n \"Mean\": \"&atomicRW_avg\",\n \"Min\": \"&atomicRW_min\",\n \"Max\": \"&atomicRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D - L2 Transactions Req $normUnit", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Units" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 124, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Addr Translation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Units 1": 9, + "Units 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 56, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + }, + { + "id": "color" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Util" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 100 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache Hit" + }, + "properties": [ + { + "id": "max", + "value": 100 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Wr BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 64, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks2\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: L2 Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "L2 Util 1": 0, + "L2 Util 2": 1, + "L2-EA Rd BW 1": 4, + "L2-EA Rd BW 2": 5, + "L2-EA Wr BW 1": 6, + "L2-EA Wr BW 2": 7 + }, + "renameByName": { + "Cache Hit 1": "L2 Cache Hit (Current)", + "Cache Hit 2": "L2 Cache Hit (Baseline)", + "L2 Util 1": "L2 Util (Current)", + "L2 Util 2": "L2 Util (Baseline)", + "L2-EA Rd BW - GB/s 1": "L2-EA RD BW (Current)", + "L2-EA Rd BW - GB/s 2": "L2-EA RD BW (baseline)", + "L2-EA Rd BW 1": "L2-EA Rd BW (Current)", + "L2-EA Rd BW 2": "L2-EA Rd BW (Baseline)", + "L2-EA Wr BW - GB/s 1": "L2-EA WR BW (Current)", + "L2-EA Wr BW - GB/s 2": "L2-EA WR BW (Baseline)", + "L2-EA Wr BW 1": "L2-EA Wr BW (Current)", + "L2-EA Wr BW 2": "L2-EA Wr BW (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 62, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Transactions", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 178 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + } + ] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 54 + }, + "id": 58, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "L2 Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Transaction", + "GroupCols": 1, + "GroupGap": 5, + "GroupLabelColor": "#FADE2A", + "GroupLabelFontSize": "120%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Metric", + "LabelColor": "#ffffff", + "LabelFontSize": "80%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FF9830", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:81", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 60, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"ioStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"ioStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"creditStarvation_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_min\": {\n \"$min\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_max\": {\n \"$max\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n } \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_READ_avg\",\n \"Min\": \"&ioStall_READ_min\",\n \"Max\": \"&ioStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_READ_avg\",\n \"Min\": \"&gmiStall_READ_min\",\n \"Max\": \"&gmiStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_READ_avg\",\n \"Min\": \"&hbmStall_READ_min\",\n \"Max\": \"&hbmStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_WRITE_avg\",\n \"Min\": \"&ioStall_WRITE_min\",\n \"Max\": \"&ioStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_WRITE_avg\",\n \"Min\": \"&gmiStall_WRITE_min\",\n \"Max\": \"&gmiStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_WRITE_avg\",\n \"Min\": \"&hbmStall_WRITE_min\",\n \"Max\": \"&hbmStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Credit Starvation\",\n \"Transaction\": \"Write\",\n \"Target\": \"Fabric\",\n \"Avg\": \"&creditStarvation_avg\",\n \"Min\": \"&creditStarvation_min\",\n \"Max\": \"&creditStarvation_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Interface Stalls (Cycles $normUnit)", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 66, + "panels": [ + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 87, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 Cache Hit Rate (Percent) (Channel 0 - 15) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 92, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "Cache Hit Rate % (Channel 16 - 31) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:565", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 81, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Read Requests(Channel 0-15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:656", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 82, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L 2 Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:697", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 83, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:750", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 84, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 85, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 91, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 189, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 195, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 191, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 197, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 193, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Request (Channel 0 - 15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 199, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "", + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 73 + }, + "hideTimeOverride": false, + "id": 68, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b0_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[0]\"]}, \n { \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b0_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[0]\"}, \"&denom\"] } \n },\n \"b0_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[0]\"}, \"&denom\"] } \n },\n \"b0_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[0]\"}, \"&denom\"] } \n },\n \"b0_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[0]\"}, \"&denom\"] } \n },\n \"b0_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[0]\"}, \"&denom\"] }\n },\n \"b0_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[0]\"}, \"&denom\"] } \n },\n \"b0_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[0]\"}, \"&denom\"] } \n },\n\n \"b0_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[0]\", \"&TCC_EA_RDREQ[0]\"]}, null] } },\n \"b0_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[0]\", \"&TCC_EA_WRREQ[0]\"]}, null] } },\n \"b0_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[0]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[0]\", \"&TCC_EA_ATOMIC[0]\"]}, null]}},\n\n \"b0_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"}, \"&denom\"] }},\n\n \n \"b1_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[1]\"]}, \n { \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b1_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[1]\"}, \"&denom\"] } \n },\n \"b1_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[1]\"}, \"&denom\"] } \n },\n \"b1_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[1]\"}, \"&denom\"] } \n },\n \"b1_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[1]\"}, \"&denom\"] }\n },\n \"b1_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[1]\"}, \"&denom\"] } \n },\n \"b1_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[1]\", \"&TCC_EA_RDREQ[1]\"]}, null] } },\n \"b1_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[1]\", \"&TCC_EA_WRREQ[1]\"]}, null] } },\n \"b1_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[1]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[1]\", \"&TCC_EA_ATOMIC[1]\"]}, null]}},\n\n \"b1_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"}, \"&denom\"] }},\n\n\n \"b2_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[2]\"]}, \n { \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b2_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[2]\"}, \"&denom\"] }\n },\n \"b2_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[2]\"}, \"&denom\"] } \n },\n \"b2_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[2]\"}, \"&denom\"] }\n },\n \"b2_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[2]\", \"&TCC_EA_RDREQ[2]\"]}, null] } },\n \"b2_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[2]\", \"&TCC_EA_WRREQ[2]\"]}, null] } },\n \"b2_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[2]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[2]\", \"&TCC_EA_ATOMIC[2]\"]}, null]}},\n\n \"b2_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"}, \"&denom\"] }},\n\n\n \n \"b3_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[3]\"]}, \n { \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b3_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[3]\"}, \"&denom\"] } \n },\n \"b3_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[3]\"}, \"&denom\"] } \n },\n \"b3_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[3]\"}, \"&denom\"] }\n },\n \"b3_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[3]\"}, \"&denom\"] }\n },\n \"b3_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[3]\"}, \"&denom\"] } \n },\n \"b3_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[3]\", \"&TCC_EA_RDREQ[3]\"]}, null] } },\n \"b3_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[3]\", \"&TCC_EA_WRREQ[3]\"]}, null] } },\n \"b3_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[3]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[3]\", \"&TCC_EA_ATOMIC[3]\"]}, null]}},\n\n \"b3_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"}, \"&denom\"] }},\n\n\n \n \"b4_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[4]\"]}, \n { \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b4_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[4]\"}, \"&denom\"] } \n },\n \"b4_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[4]\"}, \"&denom\"] } \n },\n \"b4_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[4]\"}, \"&denom\"] }\n },\n \"b4_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[4]\", \"&TCC_EA_RDREQ[4]\"]}, null] } },\n \"b4_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[4]\", \"&TCC_EA_WRREQ[4]\"]}, null] } },\n \"b4_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[4]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[4]\", \"&TCC_EA_ATOMIC[4]\"]}, null]}},\n\n \"b4_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"}, \"&denom\"] }},\n\n\n \n \"b5_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[5]\"]}, \n { \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b5_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[5]\"}, \"&denom\"] } \n },\n \"b5_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[5]\"}, \"&denom\"] } \n },\n \"b5_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[5]\"}, \"&denom\"] } \n },\n \"b5_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[5]\", \"&TCC_EA_RDREQ[5]\"]}, null] } },\n \"b5_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[5]\", \"&TCC_EA_WRREQ[5]\"]}, null] } },\n \"b5_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[5]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[5]\", \"&TCC_EA_ATOMIC[5]\"]}, null]}},\n\n \"b5_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"}, \"&denom\"] }},\n\n\n \n \"b6_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[6]\"]}, \n { \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b6_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[6]\"}, \"&denom\"] } \n },\n \"b6_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[6]\"}, \"&denom\"] } \n },\n \"b6_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[6]\"}, \"&denom\"] } \n },\n \"b6_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[6]\"}, \"&denom\"] } \n },\n \"b6_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[6]\"}, \"&denom\"] }\n },\n \"b6_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[6]\", \"&TCC_EA_RDREQ[6]\"]}, null] } },\n \"b6_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[6]\", \"&TCC_EA_WRREQ[6]\"]}, null] } },\n \"b6_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[6]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[6]\", \"&TCC_EA_ATOMIC[6]\"]}, null]}},\n\n \"b6_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"}, \"&denom\"] }},\n\n\n \n \"b7_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[7]\"]}, \n { \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b7_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[7]\"}, \"&denom\"] } \n },\n \"b7_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[7]\"}, \"&denom\"] } \n },\n \"b7_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[7]\"}, \"&denom\"] } \n },\n \"b7_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[7]\"}, \"&denom\"] } \n },\n \"b7_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[7]\"}, \"&denom\"] }\n },\n \"b7_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[7]\", \"&TCC_EA_RDREQ[7]\"]}, null] } },\n \"b7_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[7]\", \"&TCC_EA_WRREQ[7]\"]}, null] } },\n \"b7_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[7]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[7]\", \"&TCC_EA_ATOMIC[7]\"]}, null]}},\n\n \"b7_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"}, \"&denom\"] }},\n\n\n \n \"b8_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[8]\"]}, \n { \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b8_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[8]\"}, \"&denom\"] } \n },\n \"b8_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[8]\"}, \"&denom\"] } \n },\n \"b8_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[8]\"}, \"&denom\"] } \n },\n \"b8_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[8]\", \"&TCC_EA_RDREQ[8]\"]}, null] } },\n \"b8_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[8]\", \"&TCC_EA_WRREQ[8]\"]}, null] } },\n \"b8_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[8]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[8]\", \"&TCC_EA_ATOMIC[8]\"]}, null]}},\n\n \"b8_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"}, \"&denom\"] }},\n\n\n \n \"b9_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[9]\"]}, \n { \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b9_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[9]\"}, \"&denom\"] } \n },\n \"b9_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[9]\"}, \"&denom\"] } \n },\n \"b9_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[9]\"}, \"&denom\"] } \n },\n \"b9_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[9]\", \"&TCC_EA_RDREQ[9]\"]}, null] } },\n \"b9_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[9]\", \"&TCC_EA_WRREQ[9]\"]}, null] } },\n \"b9_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[9]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[9]\", \"&TCC_EA_ATOMIC[9]\"]}, null]}},\n\n \"b9_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"}, \"&denom\"] }},\n\n\n \n \"b10_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[10]\"]}, \n { \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b10_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[10]\"}, \"&denom\"] } \n },\n \"b10_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[10]\"}, \"&denom\"] } \n },\n \"b10_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[10]\"}, \"&denom\"] } \n },\n \"b10_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[10]\", \"&TCC_EA_RDREQ[10]\"]}, null] } },\n \"b10_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[10]\", \"&TCC_EA_WRREQ[10]\"]}, null] } },\n \"b10_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[10]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[10]\", \"&TCC_EA_ATOMIC[10]\"]}, null]}},\n\n \"b10_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"}, \"&denom\"] }},\n\n\n \n \"b11_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[11]\"]}, \n { \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b11_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[11]\"}, \"&denom\"] } \n },\n \"b11_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[11]\"}, \"&denom\"] } \n },\n \"b11_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[11]\"}, \"&denom\"] } \n },\n \"b11_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[11]\", \"&TCC_EA_RDREQ[11]\"]}, null] } },\n \"b11_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[11]\", \"&TCC_EA_WRREQ[11]\"]}, null] } },\n \"b11_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[11]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[11]\", \"&TCC_EA_ATOMIC[11]\"]}, null]}},\n\n \"b11_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"}, \"&denom\"] }},\n\n\n \n \"b12_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[12]\"]}, \n { \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b12_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[12]\"}, \"&denom\"] } \n },\n \"b12_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[12]\"}, \"&denom\"] } \n },\n \"b12_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[12]\"}, \"&denom\"] } \n },\n \"b12_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[12]\", \"&TCC_EA_RDREQ[12]\"]}, null] } },\n \"b12_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[12]\", \"&TCC_EA_WRREQ[12]\"]}, null] } },\n \"b12_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[12]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[12]\", \"&TCC_EA_ATOMIC[12]\"]}, null]}},\n\n \"b12_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"}, \"&denom\"] }},\n\n\n \n \"b13_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[13]\"]}, \n { \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b13_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[13]\"}, \"&denom\"] } \n },\n \"b13_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[13]\"}, \"&denom\"] } \n },\n \"b13_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[13]\"}, \"&denom\"] } \n },\n \"b13_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[13]\"}, \"&denom\"] } \n },\n \"b13_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[13]\"}, \"&denom\"] }\n },\n \"b13_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[13]\", \"&TCC_EA_RDREQ[13]\"]}, null] } },\n \"b13_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[13]\", \"&TCC_EA_WRREQ[13]\"]}, null] } },\n \"b13_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[13]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[13]\", \"&TCC_EA_ATOMIC[13]\"]}, null]}},\n\n \"b13_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"}, \"&denom\"] }},\n\n\n \n \"b14_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[14]\"]}, \n { \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b14_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[14]\"}, \"&denom\"] } \n },\n \"b14_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[14]\"}, \"&denom\"] } \n },\n \"b14_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[14]\"}, \"&denom\"] } \n },\n \"b14_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[14]\", \"&TCC_EA_RDREQ[14]\"]}, null] } },\n \"b14_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[14]\", \"&TCC_EA_WRREQ[14]\"]}, null] } },\n \"b14_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[14]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[14]\", \"&TCC_EA_ATOMIC[14]\"]}, null]}},\n\n \"b14_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"}, \"&denom\"] }},\n\n\n \n \"b15_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[15]\"]}, \n { \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b15_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[15]\"}, \"&denom\"] } \n },\n \"b15_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[15]\"}, \"&denom\"] } \n },\n \"b15_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[15]\"}, \"&denom\"] } \n },\n \"b15_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[15]\"}, \"&denom\"] } \n },\n \"b15_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[15]\"}, \"&denom\"] }\n },\n \"b15_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[15]\", \"&TCC_EA_RDREQ[15]\"]}, null] } },\n \"b15_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[15]\", \"&TCC_EA_WRREQ[15]\"]}, null] } },\n \"b15_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[15]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[15]\", \"&TCC_EA_ATOMIC[15]\"]}, null]}},\n\n \"b15_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"}, \"&denom\"] }}\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"0\",\n \"Hit Rate\": \"&b0_hitRate\",\n \"Req\": \"&b0_req\",\n \"Read Req\": \"&b0_readReq\",\n \"Write Req\": \"&b0_writeReq\",\n \"AtomicReq\": \"&b0_atomicReq\",\n \"EA Read Req\": \"&b0_eaReadReq\",\n \"EA Write Req\": \"&b0_eaWriteReq\",\n \"EA AtomicReq\": \"&b0_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b0_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b0_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b0_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b0_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b0_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b0_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b0_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b0_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b0_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b0_ea_write_stall_too_many\"\n },\n {\n \"Channel\": \"1\",\n \"Hit Rate\": \"&b1_hitRate\",\n \"Req\": \"&b1_req\",\n \"Read Req\": \"&b1_readReq\",\n \"Write Req\": \"&b1_writeReq\",\n \"AtomicReq\": \"&b1_atomicReq\",\n \"EA Read Req\": \"&b1_eaReadReq\",\n \"EA Write Req\": \"&b1_eaWriteReq\",\n \"EA AtomicReq\": \"&b1_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b1_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b1_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b1_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b1_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b1_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b1_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b1_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b1_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b1_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b1_ea_write_stall_too_many\"\n },\n {\n \"Channel\": \"2\",\n \"Hit Rate\": \"&b2_hitRate\",\n \"Req\": \"&b2_req\",\n \"Read Req\": \"&b2_readReq\",\n \"Write Req\": \"&b2_writeReq\",\n \"AtomicReq\": \"&b2_atomicReq\",\n \"EA Read Req\": \"&b2_eaReadReq\",\n \"EA Write Req\": \"&b2_eaWriteReq\",\n \"EA AtomicReq\": \"&b2_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b2_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b2_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b2_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b2_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b2_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b2_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b2_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b2_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b2_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b2_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"3\",\n \"Hit Rate\": \"&b3_hitRate\",\n \"Req\": \"&b3_req\",\n \"Read Req\": \"&b3_readReq\",\n \"Write Req\": \"&b3_writeReq\",\n \"AtomicReq\": \"&b3_atomicReq\",\n \"EA Read Req\": \"&b3_eaReadReq\",\n \"EA Write Req\": \"&b3_eaWriteReq\",\n \"EA AtomicReq\": \"&b3_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b3_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b3_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b3_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b3_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b3_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b3_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b3_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b3_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b3_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b3_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"4\",\n \"Hit Rate\": \"&b4_hitRate\",\n \"Req\": \"&b4_req\",\n \"Read Req\": \"&b4_readReq\",\n \"Write Req\": \"&b4_writeReq\",\n \"AtomicReq\": \"&b4_atomicReq\",\n \"EA Read Req\": \"&b4_eaReadReq\",\n \"EA Write Req\": \"&b4_eaWriteReq\",\n \"EA AtomicReq\": \"&b4_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b4_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b4_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b4_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b4_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b4_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b4_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b4_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b4_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b4_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b4_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"5\",\n \"Hit Rate\": \"&b5_hitRate\",\n \"Req\": \"&b5_req\",\n \"Read Req\": \"&b5_readReq\",\n \"Write Req\": \"&b5_writeReq\",\n \"AtomicReq\": \"&b5_atomicReq\",\n \"EA Read Req\": \"&b5_eaReadReq\",\n \"EA Write Req\": \"&b5_eaWriteReq\",\n \"EA AtomicReq\": \"&b5_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b5_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b5_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b5_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b5_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b5_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b5_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b5_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b5_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b5_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b5_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"6\",\n \"Hit Rate\": \"&b6_hitRate\",\n \"Req\": \"&b6_req\",\n \"Read Req\": \"&b6_readReq\",\n \"Write Req\": \"&b6_writeReq\",\n \"AtomicReq\": \"&b6_atomicReq\",\n \"EA Read Req\": \"&b6_eaReadReq\",\n \"EA Write Req\": \"&b6_eaWriteReq\",\n \"EA AtomicReq\": \"&b6_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b6_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b6_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b6_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b6_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b6_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b6_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b6_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b6_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b6_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b6_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"7\",\n \"Hit Rate\": \"&b7_hitRate\",\n \"Req\": \"&b7_req\",\n \"Read Req\": \"&b7_readReq\",\n \"Write Req\": \"&b7_writeReq\",\n \"AtomicReq\": \"&b7_atomicReq\",\n \"EA Read Req\": \"&b7_eaReadReq\",\n \"EA Write Req\": \"&b7_eaWriteReq\",\n \"EA AtomicReq\": \"&b7_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b7_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b7_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b7_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b7_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b7_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b7_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b7_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b7_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b7_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b7_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"8\",\n \"Hit Rate\": \"&b8_hitRate\",\n \"Req\": \"&b8_req\",\n \"Read Req\": \"&b8_readReq\",\n \"Write Req\": \"&b8_writeReq\",\n \"AtomicReq\": \"&b8_atomicReq\",\n \"EA Read Req\": \"&b8_eaReadReq\",\n \"EA Write Req\": \"&b8_eaWriteReq\",\n \"EA AtomicReq\": \"&b8_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b8_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b8_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b8_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b8_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b8_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b8_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b8_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b8_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b8_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b8_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"9\",\n \"Hit Rate\": \"&b9_hitRate\",\n \"Req\": \"&b9_req\",\n \"Read Req\": \"&b9_readReq\",\n \"Write Req\": \"&b9_writeReq\",\n \"AtomicReq\": \"&b9_atomicReq\",\n \"EA Read Req\": \"&b9_eaReadReq\",\n \"EA Write Req\": \"&b9_eaWriteReq\",\n \"EA AtomicReq\": \"&b9_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b9_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b9_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b9_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b9_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b9_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b9_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b9_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b9_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b9_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b9_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"10\",\n \"Hit Rate\": \"&b10_hitRate\",\n \"Req\": \"&b10_req\",\n \"Read Req\": \"&b10_readReq\",\n \"Write Req\": \"&b10_writeReq\",\n \"AtomicReq\": \"&b10_atomicReq\",\n \"EA Read Req\": \"&b10_eaReadReq\",\n \"EA Write Req\": \"&b10_eaWriteReq\",\n \"EA AtomicReq\": \"&b10_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b10_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b10_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b10_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b10_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b10_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b10_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b10_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b10_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b10_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b10_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"11\",\n \"Hit Rate\": \"&b11_hitRate\",\n \"Req\": \"&b11_req\",\n \"Read Req\": \"&b11_readReq\",\n \"Write Req\": \"&b11_writeReq\",\n \"AtomicReq\": \"&b11_atomicReq\",\n \"EA Read Req\": \"&b11_eaReadReq\",\n \"EA Write Req\": \"&b11_eaWriteReq\",\n \"EA AtomicReq\": \"&b11_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b11_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b11_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b11_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b11_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b11_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b11_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b11_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b11_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b11_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b11_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"12\",\n \"Hit Rate\": \"&b12_hitRate\",\n \"Req\": \"&b12_req\",\n \"Read Req\": \"&b12_readReq\",\n \"Write Req\": \"&b12_writeReq\",\n \"AtomicReq\": \"&b12_atomicReq\",\n \"EA Read Req\": \"&b12_eaReadReq\",\n \"EA Write Req\": \"&b12_eaWriteReq\",\n \"EA AtomicReq\": \"&b12_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b12_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b12_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b12_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b12_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b12_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b12_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b12_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b12_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b12_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b12_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"13\",\n \"Hit Rate\": \"&b13_hitRate\",\n \"Req\": \"&b13_req\",\n \"Read Req\": \"&b13_readReq\",\n \"Write Req\": \"&b13_writeReq\",\n \"AtomicReq\": \"&b13_atomicReq\",\n \"EA Read Req\": \"&b13_eaReadReq\",\n \"EA Write Req\": \"&b13_eaWriteReq\",\n \"EA AtomicReq\": \"&b13_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b13_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b13_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b13_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b13_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b13_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b13_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b13_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b13_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b13_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b13_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"14\",\n \"Hit Rate\": \"&b14_hitRate\",\n \"Req\": \"&b14_req\",\n \"Read Req\": \"&b14_readReq\",\n \"Write Req\": \"&b14_writeReq\",\n \"AtomicReq\": \"&b14_atomicReq\",\n \"EA Read Req\": \"&b14_eaReadReq\",\n \"EA Write Req\": \"&b14_eaWriteReq\",\n \"EA AtomicReq\": \"&b14_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b14_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b14_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b14_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b14_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b14_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b14_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b14_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b14_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b14_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b14_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"15\",\n \"Hit Rate\": \"&b15_hitRate\",\n \"Req\": \"&b15_req\",\n \"Read Req\": \"&b15_readReq\",\n \"Write Req\": \"&b15_writeReq\",\n \"AtomicReq\": \"&b15_atomicReq\",\n \"EA Read Req\": \"&b15_eaReadReq\",\n \"EA Write Req\": \"&b15_eaWriteReq\",\n \"EA AtomicReq\": \"&b15_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b15_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b15_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b15_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b15_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b15_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b15_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b15_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b15_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b15_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b15_ea_write_stall_too_many\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 73 + }, + "id": 70, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b16_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[16]\"]}, \n { \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b16_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[16]\"}, \"&denom\"] } \n },\n \"b16_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[16]\"}, \"&denom\"] } \n },\n \"b16_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[16]\"}, \"&denom\"] } \n },\n \"b16_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[16]\"}, \"&denom\"] } \n },\n \"b16_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[16]\"}, \"&denom\"] }\n },\n \"b16_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[16]\"}, \"&denom\"] } \n },\n \"b16_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[16]\"}, \"&denom\"] } \n },\n\n \"b16_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[16]\", \"&TCC_EA_RDREQ[16]\"]}, null] } },\n \"b16_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[16]\", \"&TCC_EA_WRREQ[16]\"]}, null] } },\n \"b16_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[16]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[16]\", \"&TCC_EA_ATOMIC[16]\"]}, null]}},\n \"b16_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"}, \"&denom\"] }},\n\n \n \"b17_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[17]\"]}, \n { \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b17_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[17]\"}, \"&denom\"] } \n },\n \"b17_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[17]\"}, \"&denom\"] } \n },\n \"b17_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[17]\"}, \"&denom\"] } \n },\n \"b17_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[17]\"}, \"&denom\"] }\n },\n \"b17_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[17]\"}, \"&denom\"] } \n },\n \"b17_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[17]\", \"&TCC_EA_RDREQ[17]\"]}, null] } },\n \"b17_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[17]\", \"&TCC_EA_WRREQ[17]\"]}, null] } },\n \"b17_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[17]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[17]\", \"&TCC_EA_ATOMIC[17]\"]}, null]}},\n \"b17_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"}, \"&denom\"] }},\n\n \n \"b18_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[18]\"]}, \n { \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b18_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[18]\"}, \"&denom\"] }\n },\n \"b18_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[18]\"}, \"&denom\"] } \n },\n \"b18_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[18]\"}, \"&denom\"] }\n },\n \"b18_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[18]\", \"&TCC_EA_RDREQ[18]\"]}, null] } },\n \"b18_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[18]\", \"&TCC_EA_WRREQ[18]\"]}, null] } },\n \"b18_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[18]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[18]\", \"&TCC_EA_ATOMIC[18]\"]}, null]}},\n \"b18_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"}, \"&denom\"] }},\n\n \n \"b19_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[19]\"]}, \n { \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b19_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[19]\"}, \"&denom\"] } \n },\n \"b19_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[19]\"}, \"&denom\"] } \n },\n \"b19_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[19]\"}, \"&denom\"] }\n },\n \"b19_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[19]\"}, \"&denom\"] }\n },\n \"b19_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[19]\"}, \"&denom\"] } \n },\n \"b19_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[19]\", \"&TCC_EA_RDREQ[19]\"]}, null] } },\n \"b19_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[19]\", \"&TCC_EA_WRREQ[19]\"]}, null] } },\n \"b19_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[19]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[19]\", \"&TCC_EA_ATOMIC[19]\"]}, null]}},\n \"b19_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"}, \"&denom\"] }},\n\n \n \"b20_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[20]\"]}, \n { \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b20_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[20]\"}, \"&denom\"] } \n },\n \"b20_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[20]\"}, \"&denom\"] } \n },\n \"b20_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[20]\"}, \"&denom\"] }\n },\n \"b20_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[20]\", \"&TCC_EA_RDREQ[20]\"]}, null] } },\n \"b20_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[20]\", \"&TCC_EA_WRREQ[20]\"]}, null] } },\n \"b20_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[20]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[20]\", \"&TCC_EA_ATOMIC[20]\"]}, null]}},\n \"b20_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"}, \"&denom\"] }},\n\n \n\n \"b21_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[21]\"]}, \n { \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b21_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[21]\"}, \"&denom\"] } \n },\n \"b21_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[21]\"}, \"&denom\"] } \n },\n \"b21_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[21]\"}, \"&denom\"] } \n },\n \"b21_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[21]\", \"&TCC_EA_RDREQ[21]\"]}, null] } },\n \"b21_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[21]\", \"&TCC_EA_WRREQ[21]\"]}, null] } },\n \"b21_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[21]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[21]\", \"&TCC_EA_ATOMIC[21]\"]}, null]}},\n \"b21_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"}, \"&denom\"] }},\n\n \n\n \"b22_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[22]\"]}, \n { \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b22_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[22]\"}, \"&denom\"] } \n },\n \"b22_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[22]\"}, \"&denom\"] } \n },\n \"b22_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[22]\"}, \"&denom\"] } \n },\n \"b22_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[22]\"}, \"&denom\"] } \n },\n \"b22_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[22]\"}, \"&denom\"] }\n },\n \"b22_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[22]\", \"&TCC_EA_RDREQ[22]\"]}, null] } },\n \"b22_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[22]\", \"&TCC_EA_WRREQ[22]\"]}, null] } },\n \"b22_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[22]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[22]\", \"&TCC_EA_ATOMIC[22]\"]}, null]}},\n \"b22_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"}, \"&denom\"] }},\n\n \n\n \"b23_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[23]\"]}, \n { \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b23_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[23]\"}, \"&denom\"] } \n },\n \"b23_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[23]\"}, \"&denom\"] } \n },\n \"b23_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[23]\"}, \"&denom\"] } \n },\n \"b23_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[23]\"}, \"&denom\"] } \n },\n \"b23_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[23]\"}, \"&denom\"] }\n },\n \"b23_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[23]\", \"&TCC_EA_RDREQ[23]\"]}, null] } },\n \"b23_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[23]\", \"&TCC_EA_WRREQ[23]\"]}, null] } },\n \"b23_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[23]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[23]\", \"&TCC_EA_ATOMIC[23]\"]}, null]}},\n \"b23_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"}, \"&denom\"] }},\n\n \n \"b24_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[24]\"]}, \n { \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b24_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[24]\"}, \"&denom\"] } \n },\n \"b24_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[24]\"}, \"&denom\"] } \n },\n \"b24_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[24]\"}, \"&denom\"] } \n },\n \"b24_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[24]\", \"&TCC_EA_RDREQ[24]\"]}, null] } },\n \"b24_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[24]\", \"&TCC_EA_WRREQ[24]\"]}, null] } },\n \"b24_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[24]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[24]\", \"&TCC_EA_ATOMIC[24]\"]}, null]}},\n \"b24_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"}, \"&denom\"] }},\n\n \n \"b25_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[25]\"]}, \n { \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b25_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[25]\"}, \"&denom\"] } \n },\n \"b25_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[25]\"}, \"&denom\"] } \n },\n \"b25_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[25]\"}, \"&denom\"] } \n },\n \"b25_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[25]\", \"&TCC_EA_RDREQ[25]\"]}, null] } },\n \"b25_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[25]\", \"&TCC_EA_WRREQ[25]\"]}, null] } },\n \"b25_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[25]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[25]\", \"&TCC_EA_ATOMIC[25]\"]}, null]}},\n \"b25_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"}, \"&denom\"] }},\n\n \n \"b26_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[26]\"]}, \n { \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b26_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[26]\"}, \"&denom\"] } \n },\n \"b26_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[26]\"}, \"&denom\"] } \n },\n \"b26_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[26]\"}, \"&denom\"] } \n },\n \"b26_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[26]\", \"&TCC_EA_RDREQ[26]\"]}, null] } },\n \"b26_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[26]\", \"&TCC_EA_WRREQ[26]\"]}, null] } },\n \"b26_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[26]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[26]\", \"&TCC_EA_ATOMIC[26]\"]}, null]}},\n \"b26_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"}, \"&denom\"] }},\n\n \n \"b27_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[27]\"]}, \n { \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b27_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[27]\"}, \"&denom\"] } \n },\n \"b27_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[27]\"}, \"&denom\"] } \n },\n \"b27_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[27]\"}, \"&denom\"] } \n },\n \"b27_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[27]\", \"&TCC_EA_RDREQ[27]\"]}, null] } },\n \"b27_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[27]\", \"&TCC_EA_WRREQ[27]\"]}, null] } },\n \"b27_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[27]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[27]\", \"&TCC_EA_ATOMIC[27]\"]}, null]}},\n \"b27_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"}, \"&denom\"] }},\n\n \n \"b28_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[28]\"]}, \n { \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b28_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[28]\"}, \"&denom\"] } \n },\n \"b28_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[28]\"}, \"&denom\"] } \n },\n \"b28_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[28]\"}, \"&denom\"] } \n },\n \"b28_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[28]\", \"&TCC_EA_RDREQ[28]\"]}, null] } },\n \"b28_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[28]\", \"&TCC_EA_WRREQ[28]\"]}, null] } },\n \"b28_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[28]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[28]\", \"&TCC_EA_ATOMIC[28]\"]}, null]}},\n \"b28_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"}, \"&denom\"] }},\n\n \n \"b29_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[29]\"]}, \n { \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b29_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[29]\"}, \"&denom\"] } \n },\n \"b29_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[29]\"}, \"&denom\"] } \n },\n \"b29_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[29]\"}, \"&denom\"] } \n },\n \"b29_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[29]\"}, \"&denom\"] } \n },\n \"b29_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[29]\"}, \"&denom\"] }\n },\n \"b29_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[29]\", \"&TCC_EA_RDREQ[29]\"]}, null] } },\n \"b29_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[29]\", \"&TCC_EA_WRREQ[29]\"]}, null] } },\n \"b29_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[29]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[29]\", \"&TCC_EA_ATOMIC[29]\"]}, null]}},\n \"b29_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"}, \"&denom\"] }},\n\n \n \"b30_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[30]\"]}, \n { \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b30_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[30]\"}, \"&denom\"] } \n },\n \"b30_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[30]\"}, \"&denom\"] } \n },\n \"b30_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[30]\"}, \"&denom\"] } \n },\n \"b30_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[30]\", \"&TCC_EA_RDREQ[30]\"]}, null] } },\n \"b30_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[30]\", \"&TCC_EA_WRREQ[30]\"]}, null] } },\n \"b30_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[30]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[30]\", \"&TCC_EA_ATOMIC[30]\"]}, null]}},\n \"b30_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"}, \"&denom\"] }},\n\n \n \"b31_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[31]\"]}, \n { \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b31_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[31]\"}, \"&denom\"] } \n },\n \"b31_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[31]\"}, \"&denom\"] } \n },\n \"b31_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[31]\"}, \"&denom\"] } \n },\n \"b31_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[31]\"}, \"&denom\"] } \n },\n \"b31_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}, \"&denom\"] }\n },\n \"b31_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[31]\", \"&TCC_EA_RDREQ[31]\"]}, null] } },\n \"b31_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[31]\", \"&TCC_EA_WRREQ[31]\"]}, null] } },\n \"b31_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[31]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[31]\", \"&TCC_EA_ATOMIC[31]\"]}, null]}},\n \"b31_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}, \"&denom\"] }}\n\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"16\",\n \"Hit Rate\": \"&b16_hitRate\",\n \"Req\": \"&b16_req\",\n \"Read Req\": \"&b16_readReq\",\n \"Write Req\": \"&b16_writeReq\",\n \"AtomicReq\": \"&b16_atomicReq\",\n \"EA Read Req\": \"&b16_eaReadReq\",\n \"EA Write Req\": \"&b16_eaWriteReq\",\n \"EA AtomicReq\": \"&b16_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b16_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b16_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b16_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b16_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b16_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b16_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b16_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b16_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b16_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b16_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"17\",\n \"Hit Rate\": \"&b17_hitRate\",\n \"Req\": \"&b17_req\",\n \"Read Req\": \"&b17_readReq\",\n \"Write Req\": \"&b17_writeReq\",\n \"AtomicReq\": \"&b17_atomicReq\",\n \"EA Read Req\": \"&b17_eaReadReq\",\n \"EA Write Req\": \"&b17_eaWriteReq\",\n \"EA AtomicReq\": \"&b17_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b17_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b17_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b17_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b17_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b17_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b17_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b17_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b17_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b17_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b17_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"18\",\n \"Hit Rate\": \"&b18_hitRate\",\n \"Req\": \"&b18_req\",\n \"Read Req\": \"&b18_readReq\",\n \"Write Req\": \"&b18_writeReq\",\n \"AtomicReq\": \"&b18_atomicReq\",\n \"EA Read Req\": \"&b18_eaReadReq\",\n \"EA Write Req\": \"&b18_eaWriteReq\",\n \"EA AtomicReq\": \"&b18_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b18_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b18_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b18_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b18_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b18_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b18_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b18_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b18_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b18_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b18_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"19\",\n \"Hit Rate\": \"&b19_hitRate\",\n \"Req\": \"&b19_req\",\n \"Read Req\": \"&b19_readReq\",\n \"Write Req\": \"&b19_writeReq\",\n \"AtomicReq\": \"&b19_atomicReq\",\n \"EA Read Req\": \"&b19_eaReadReq\",\n \"EA Write Req\": \"&b19_eaWriteReq\",\n \"EA AtomicReq\": \"&b19_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b19_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b19_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b19_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b19_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b19_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b19_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b19_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b19_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b19_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b19_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"20\",\n \"Hit Rate\": \"&b20_hitRate\",\n \"Req\": \"&b20_req\",\n \"Read Req\": \"&b20_readReq\",\n \"Write Req\": \"&b20_writeReq\",\n \"AtomicReq\": \"&b20_atomicReq\",\n \"EA Read Req\": \"&b20_eaReadReq\",\n \"EA Write Req\": \"&b20_eaWriteReq\",\n \"EA AtomicReq\": \"&b20_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b20_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b20_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b20_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b20_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b20_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b20_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b20_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b20_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b20_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b20_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"21\",\n \"Hit Rate\": \"&b21_hitRate\",\n \"Req\": \"&b21_req\",\n \"Read Req\": \"&b21_readReq\",\n \"Write Req\": \"&b21_writeReq\",\n \"AtomicReq\": \"&b21_atomicReq\",\n \"EA Read Req\": \"&b21_eaReadReq\",\n \"EA Write Req\": \"&b21_eaWriteReq\",\n \"EA AtomicReq\": \"&b21_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b21_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b21_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b21_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b21_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b21_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b21_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b21_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b21_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b21_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b21_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"22\",\n \"Hit Rate\": \"&b22_hitRate\",\n \"Req\": \"&b22_req\",\n \"Read Req\": \"&b22_readReq\",\n \"Write Req\": \"&b22_writeReq\",\n \"AtomicReq\": \"&b22_atomicReq\",\n \"EA Read Req\": \"&b22_eaReadReq\",\n \"EA Write Req\": \"&b22_eaWriteReq\",\n \"EA AtomicReq\": \"&b22_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b22_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b22_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b22_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b22_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b22_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b22_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b22_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b22_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b22_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b22_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"23\",\n \"Hit Rate\": \"&b23_hitRate\",\n \"Req\": \"&b23_req\",\n \"Read Req\": \"&b23_readReq\",\n \"Write Req\": \"&b23_writeReq\",\n \"AtomicReq\": \"&b23_atomicReq\",\n \"EA Read Req\": \"&b23_eaReadReq\",\n \"EA Write Req\": \"&b23_eaWriteReq\",\n \"EA AtomicReq\": \"&b23_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b23_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b23_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b23_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b23_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b23_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b23_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b23_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b23_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b23_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b23_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"24\",\n \"Hit Rate\": \"&b24_hitRate\",\n \"Req\": \"&b24_req\",\n \"Read Req\": \"&b24_readReq\",\n \"Write Req\": \"&b24_writeReq\",\n \"AtomicReq\": \"&b24_atomicReq\",\n \"EA Read Req\": \"&b24_eaReadReq\",\n \"EA Write Req\": \"&b24_eaWriteReq\",\n \"EA AtomicReq\": \"&b24_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b24_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b24_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b24_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b24_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b24_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b24_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b24_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b24_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b24_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b24_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"25\",\n \"Hit Rate\": \"&b25_hitRate\",\n \"Req\": \"&b25_req\",\n \"Read Req\": \"&b25_readReq\",\n \"Write Req\": \"&b25_writeReq\",\n \"AtomicReq\": \"&b25_atomicReq\",\n \"EA Read Req\": \"&b25_eaReadReq\",\n \"EA Write Req\": \"&b25_eaWriteReq\",\n \"EA AtomicReq\": \"&b25_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b25_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b25_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b25_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b25_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b25_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b25_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b25_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b25_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b25_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b25_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"26\",\n \"Hit Rate\": \"&b26_hitRate\",\n \"Req\": \"&b26_req\",\n \"Read Req\": \"&b26_readReq\",\n \"Write Req\": \"&b26_writeReq\",\n \"AtomicReq\": \"&b26_atomicReq\",\n \"EA Read Req\": \"&b26_eaReadReq\",\n \"EA Write Req\": \"&b26_eaWriteReq\",\n \"EA AtomicReq\": \"&b26_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b26_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b26_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b26_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b26_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b26_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b26_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b26_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b26_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b26_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b26_ea_write_stall_too_many\"\n\n\n },\n {\n \"Channel\": \"27\",\n \"Hit Rate\": \"&b27_hitRate\",\n \"Req\": \"&b27_req\",\n \"Read Req\": \"&b27_readReq\",\n \"Write Req\": \"&b27_writeReq\",\n \"AtomicReq\": \"&b27_atomicReq\",\n \"EA Read Req\": \"&b27_eaReadReq\",\n \"EA Write Req\": \"&b27_eaWriteReq\",\n \"EA AtomicReq\": \"&b27_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b27_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b27_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b27_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b27_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b27_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b27_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b27_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b27_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b27_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b27_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"28\",\n \"Hit Rate\": \"&b28_hitRate\",\n \"Req\": \"&b28_req\",\n \"Read Req\": \"&b28_readReq\",\n \"Write Req\": \"&b28_writeReq\",\n \"AtomicReq\": \"&b28_atomicReq\",\n \"EA Read Req\": \"&b28_eaReadReq\",\n \"EA Write Req\": \"&b28_eaWriteReq\",\n \"EA AtomicReq\": \"&b28_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b28_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b28_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b28_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b28_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b28_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b28_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b28_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b28_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b28_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b28_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"29\",\n \"Hit Rate\": \"&b29_hitRate\",\n \"Req\": \"&b29_req\",\n \"Read Req\": \"&b29_readReq\",\n \"Write Req\": \"&b29_writeReq\",\n \"AtomicReq\": \"&b29_atomicReq\",\n \"EA Read Req\": \"&b29_eaReadReq\",\n \"EA Write Req\": \"&b29_eaWriteReq\",\n \"EA AtomicReq\": \"&b29_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b29_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b29_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b29_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b29_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b29_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b29_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b29_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b29_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b29_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b29_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"30\",\n \"Hit Rate\": \"&b30_hitRate\",\n \"Req\": \"&b30_req\",\n \"Read Req\": \"&b30_readReq\",\n \"Write Req\": \"&b30_writeReq\",\n \"AtomicReq\": \"&b30_atomicReq\",\n \"EA Read Req\": \"&b30_eaReadReq\",\n \"EA Write Req\": \"&b30_eaWriteReq\",\n \"EA AtomicReq\": \"&b30_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b30_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b30_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b30_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b30_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b30_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b30_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b30_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b30_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b30_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b30_ea_write_stall_too_many\"\n\n },\n {\n \"Channel\": \"31\",\n \"Hit Rate\": \"&b31_hitRate\",\n \"Req\": \"&b31_req\",\n \"Read Req\": \"&b31_readReq\",\n \"Write Req\": \"&b31_writeReq\",\n \"AtomicReq\": \"&b31_atomicReq\",\n \"EA Read Req\": \"&b31_eaReadReq\",\n \"EA Write Req\": \"&b31_eaWriteReq\",\n \"EA AtomicReq\": \"&b31_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b31_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b31_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b31_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b31_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b31_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b31_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b31_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b31_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b31_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b31_ea_write_stall_too_many\"\n\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 16-31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 81 + }, + "id": 93, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 81 + }, + "id": 94, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 89 + }, + "id": 187, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 89 + }, + "id": 201, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 97 + }, + "id": 220, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 97 + }, + "id": 227, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 221, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 105 + }, + "id": 228, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 222, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 113 + }, + "id": 229, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 121 + }, + "id": 223, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 121 + }, + "id": 230, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 129 + }, + "id": 225, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 129 + }, + "id": 231, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 137 + }, + "id": 224, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 137 + }, + "id": 232, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 145 + }, + "id": 226, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 145 + }, + "id": 233, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache (per Channel)", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 34, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + "hide": 0, + "includeAll": false, + "label": "Normalization", + "multi": false, + "name": "normUnit", + "options": [ + { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + { + "selected": false, + "text": "\"per Cycle\"", + "value": "\"per Cycle\"" + }, + { + "selected": false, + "text": "\"per Sec\"", + "value": "\"per Sec\"" + }, + { + "selected": false, + "text": "\"per Kernel\"", + "value": "\"per Kernel\"" + } + ], + "query": "\"per Wave\",\n\"per Cycle\",\n\"per Sec\",\n\"per Kernel\"", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "L2 Channels", + "multi": false, + "name": "L2Banks", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SEs", + "multi": false, + "name": "numSE", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "110", + "value": "110" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#CUs", + "multi": false, + "name": "numCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SCLK (MHz)", + "multi": false, + "name": "sclk", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SQC", + "multi": false, + "name": "numSQC", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "HBM BW (GB/s)", + "multi": false, + "name": "hbmBW", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "miperf_ast_reproducer_swatomics_mi200", + "value": "miperf_ast_reproducer_swatomics_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Workload", + "multi": false, + "name": "Workload1", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1528823703", + "value": "1528823703" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Kernel Cycles", + "multi": false, + "name": "kernelBusyCycles", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "110", + "value": "110" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Active CUs", + "multi": false, + "name": "numActiveCUs", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Filtered Dispatch ID", + "multi": false, + "name": "DispatchIDFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Dispatch Filter", + "name": "DispatchID", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "GCD", + "multi": false, + "name": "gpuFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Kernels", + "multi": true, + "name": "KernelNameFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "miperf_asw_vcopy_mi200", + "value": "miperf_asw_vcopy_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline Workload", + "multi": false, + "name": "Workload2", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "84", + "value": "84" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline ActiveCUs", + "multi": false, + "name": "numActiveCUs2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Baseline Dispatch IDs", + "multi": false, + "name": "DispatchIDFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID2:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Baseline Dispatch Filter", + "name": "DispatchID2", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline GCD", + "multi": false, + "name": "gpuFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Baseline Kernels", + "multi": true, + "name": "KernelNameFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "System Info" + ], + "value": [ + "System Info" + ] + }, + "hide": 0, + "includeAll": false, + "label": "Comparison Panels", + "multi": true, + "name": "select", + "options": [ + { + "selected": true, + "text": "System Info", + "value": "System Info" + }, + { + "selected": false, + "text": "System Speed-of-Light", + "value": "System Speed-of-Light" + }, + { + "selected": false, + "text": "Roofline", + "value": "Roofline" + }, + { + "selected": false, + "text": "Command Processor", + "value": "Command Processor" + }, + { + "selected": false, + "text": "Shader Processor Input", + "value": "Shader Processor Input" + }, + { + "selected": false, + "text": "Wavefront", + "value": "Wavefront" + }, + { + "selected": false, + "text": "Compute Pipeline", + "value": "Compute Pipeline" + }, + { + "selected": false, + "text": "Instruction Mix", + "value": "Instruction Mix" + }, + { + "selected": false, + "text": "Local Data Share", + "value": "Local Data Share" + }, + { + "selected": false, + "text": "Instruction Cache", + "value": "Instruction Cache" + }, + { + "selected": false, + "text": "Scalar L1D Cache", + "value": "Scalar L1D Cache" + }, + { + "selected": false, + "text": "Texture Addr and Data", + "value": "Texture Addr and Data" + }, + { + "selected": false, + "text": "Vector L1D Cache", + "value": "Vector L1D Cache" + }, + { + "selected": false, + "text": "L2 Cache", + "value": "L2 Cache" + } + ], + "query": "System Info, \nSystem Speed-of-Light, \nRoofline,\nCommand Processor, \nShader Processor Input, \nWavefront,\nCompute Pipeline, \nInstruction Mix,\nLocal Data Share, \nInstruction Cache, \nScalar L1D Cache, \nTexture Addr and Data, \nVector L1D Cache,\nL2 Cache", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline L2 Channels", + "multi": false, + "name": "L2Banks2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SEs", + "multi": false, + "name": "numSE2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "110", + "value": "110" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #CUs", + "multi": false, + "name": "numCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline SCLK (MHz)", + "multi": false, + "name": "sclk2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SQC", + "multi": false, + "name": "numSQC2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline HBM BW (GB/s)", + "multi": false, + "name": "hbmBW2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "mi200", + "value": "mi200" + }, + "definition": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SOC", + "multi": false, + "name": "soc", + "options": [], + "query": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "5", + "value": "5" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "TopN", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "20", + "value": "20" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "query": "1,5,10,15,20,50,100", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2021-11-04T14:21:39.749Z", + "to": "2021-11-08T14:21:39.749Z" + }, + "timepicker": {}, + "timezone": "", + "title": "rocprofiler-compute_v1.0.5_pub", + "uid": "MIPerf_v1_0_0630202211", + "version": 3, + "weekStart": "" +} diff --git a/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.7_pub.json b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.7_pub.json new file mode 100644 index 0000000000..399488c482 --- /dev/null +++ b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.7_pub.json @@ -0,0 +1,13325 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 42, + "iteration": 1674838967359, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 217, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 23, + "w": 13, + "x": 0, + "y": 1 + }, + "id": 159, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.sysinfo.aggregate([\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"command\": 1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"App Command\",\n \"Value\": \"&command\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.sysinfo.aggregate([\n {\"$match\": {\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(System Info)\"}}\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"command\": 1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"App Command\",\n \"Value\": \"&command\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "System Info", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true + }, + "indexByName": {}, + "renameByName": { + "Value 1": "Current", + "Value 2": "Baseline" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Info", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 108, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "decimals": 0, + "links": [], + "mappings": [ + { + "options": { + "match": "false", + "result": { + "index": 0 + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text" + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Percent of Peak - PoP" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.width", + "value": 252 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit 1" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 137 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 125 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 161 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 15, + "x": 0, + "y": 2 + }, + "id": 110, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n },\n\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs,\n \"Unit\": \"CUs\",\n \"peak\": $numCU,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n },\n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }}\n\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2, 4] }] }\n },\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs2,\n \"Unit\": \"CUs\",\n \"peak\": $numCU2,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs2] }, $numCU2]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk2, $numCU2, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n }, \n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU2, $numCU2] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU2, $numCU2] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC2, { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]\n }}\n\n ]);", + "type": "table" + } + ], + "title": "Speed of Light", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Metric 1": 0, + "Metric 2": 7, + "Percent of Peak - PoP 1": 5, + "Percent of Peak - PoP 2": 6, + "Unit 1": 8, + "Unit 2": 9, + "Value 1": 1, + "Value 2": 2, + "peak 1": 3, + "peak 2": 4 + }, + "renameByName": { + "Percent of Peak - PoP": "Pct-of-Peak", + "Percent of Peak - PoP 1": "Pct-of-Peak (Current)", + "Percent of Peak - PoP 2": "Pct-of-Peak (Baseline)", + "Unit": "", + "Value": "Avg", + "Value 1": "Avg (Current)", + "Value 2": "Avg (Baseline)", + "peak": "Theoretical Max", + "peak 1": "Theoretical Max (Current)", + "peak 2": "Theoretical Max (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 175, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n],\n{ allowDiskUse: true }\n);", + "type": "table" + } + ], + "title": "Dispatch IDs - Current", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 215, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Dispatch IDs - Baseline", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Speed-of-Light", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 36, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 157, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + } + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "u5Z2zJhnk" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"elapsedTime1\": {\n \"$divide\": [{\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}, 1000]\n }\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"elapsedTime1\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Kernel Time Histogram", + "transparent": true, + "type": "histogram" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 123 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Performance" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Peak FLOPs" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 213, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "L1 Cache (Bytes)" + } + ] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128 ]} \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n \n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n }}\n]);", + "type": "table" + } + ], + "title": "Top Kernels", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Name", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "peak_flops": "Peak FLOPs", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS " + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 87 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 153 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS (Bytes)" + }, + "properties": [ + { + "id": "custom.width", + "value": 98 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + }, + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dispatch" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 251, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&Index\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n\n }}\n]);", + "type": "table" + } + ], + "title": "Top Dispatches", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "peak_flops": 19, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS ", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Dispatch", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Kernel Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 40, + "panels": [ + { + "description": "All transaction units default to Billion, when per-sec norm is used", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 285, + "options": { + "addAllIDs": false, + "captureMappings": false, + "eventAutoComplete": true, + "eventSource": "options.animateLogo(svgmap, data);\r\nconsole.log(\"Starting render\");\r\nlet buff = data.series[0].fields[2].values.buffer;\r\nlet valueCount = buff.length;\r\nconsole.log(\"The buff is \", valueCount, \" long\");\r\n\r\nsvgmap.wave_life_.text(buff[0]);\r\nsvgmap.active_cu_.text(buff[1]);\r\nsvgmap.salu_.text(buff[2]);\r\nsvgmap.smem_.text(buff[3]);\r\nsvgmap.valu_.text(buff[4]);\r\nsvgmap.mfma_.text(buff[5]);\r\nsvgmap.vmem_.text(buff[6]);\r\nsvgmap.lds_.text(buff[7]);\r\nsvgmap.gws_.text(buff[8]);\r\nsvgmap.br_.text(buff[9]);\r\nsvgmap.vgpr_.text(buff[10]);\r\nsvgmap.sgpr_.text(buff[11]);\r\nsvgmap.lds_alloc_.text(buff[12]);\r\nsvgmap.scratch_alloc_.text(buff[13]);\r\nsvgmap.wavefronts_.text(buff[14]);\r\nsvgmap.workgroups_.text(buff[15]);\r\nsvgmap.lds_req_.text(buff[16]);\r\nsvgmap.il1_fetch_.text(buff[17]);\r\nsvgmap.il1_hit_.text(buff[18]);\r\nsvgmap.il1_l2_rd_.text(buff[19]);\r\nsvgmap.sl1_rd_.text(buff[20]);\r\nsvgmap.sl1_hit_.text(buff[21]);\r\nsvgmap.sl1_l2_rd_.text(buff[22]);\r\nsvgmap.sl1_l2_wr_.text(buff[23]);\r\nsvgmap.sl1_l2_atom_.text(buff[24]);\r\nsvgmap.vl1_rd_.text(buff[25]);\r\nsvgmap.vl1_wr_.text(buff[26]);\r\nsvgmap.vl1_atom_.text(buff[27]);\r\nsvgmap.vl1_hit_.text(buff[28]);\r\nsvgmap.vl1_lat_.text(buff[29]);\r\nsvgmap.vl1_l2_rd_.text(buff[30]);\r\nsvgmap.vl1_l2_wr_.text(buff[31]);\r\nsvgmap.vl1_l2_atom_.text(buff[32]);\r\nsvgmap.l2_rd_.text(buff[33]);\r\nsvgmap.l2_wr_.text(buff[34])\r\nsvgmap.l2_atom_.text(buff[35]);\r\nsvgmap.l2_hit_.text(buff[36]);\r\nsvgmap.l2_rd_lat_.text(buff[37]);\r\nsvgmap.l2_wr_lat_.text(buff[38]);\r\nsvgmap.fabric_rd_lat_.text(buff[39]);\r\nsvgmap.fabric_wr_lat_.text(buff[40]);\r\nsvgmap.fabric_atom_lat_.text(buff[41]);\r\nsvgmap.l2_fabric_rd_.text(buff[42]);\r\nsvgmap.l2_fabric_wr_.text(buff[43]);\r\nsvgmap.l2_fabric_atom_.text(buff[44]);\r\nsvgmap.hbm_rd_.text(buff[45]);\r\nsvgmap.hbm_wr_.text(buff[46]);\r\nsvgmap.lds_util_.text(buff[47]);\r\nsvgmap.vl1_coales_.text(buff[48]);\r\nsvgmap.vl1_stall_.text(buff[49]);\r\nsvgmap.wave_occ_.text(buff[50]);\r\nsvgmap.lds_lat_.text(buff[51]);\r\nsvgmap.il1_lat_.text(buff[52]);\r\nsvgmap.sl1_lat_.text(buff[53]);\r\nsvgmap.gds_req_.text(buff[54]);", + "initAutoComplete": true, + "initSource": "options.animateLogo = (svgmap, data) => {\r\n \r\n}\r\n ", + "svgMappings": [ + { + "mappedName": "wave_life_", + "svgId": "wave_life" + }, + { + "mappedName": "wave_occ_", + "svgId": "wave_occ" + }, + { + "mappedName": "salu_", + "svgId": "salu" + }, + { + "mappedName": "smem_", + "svgId": "smem" + }, + { + "mappedName": "valu_", + "svgId": "valu" + }, + { + "mappedName": "mfma_", + "svgId": "mfma" + }, + { + "mappedName": "vmem_", + "svgId": "vmem" + }, + { + "mappedName": "lds_", + "svgId": "lds" + }, + { + "mappedName": "gws_", + "svgId": "gws" + }, + { + "mappedName": "br_", + "svgId": "br" + }, + { + "mappedName": "active_cu_", + "svgId": "active_cu" + }, + { + "mappedName": "vgpr_", + "svgId": "vgpr" + }, + { + "mappedName": "sgpr_", + "svgId": "sgpr" + }, + { + "mappedName": "lds_alloc_", + "svgId": "lds_alloc" + }, + { + "mappedName": "scratch_alloc_", + "svgId": "scratch_alloc" + }, + { + "mappedName": "wavefronts_", + "svgId": "wavefronts" + }, + { + "mappedName": "workgroups_", + "svgId": "workgroups" + }, + { + "mappedName": "lds_req_", + "svgId": "lds_req" + }, + { + "mappedName": "vl1_wr_", + "svgId": "vl1_wr" + }, + { + "mappedName": "vl1_atom_", + "svgId": "vl1_atom" + }, + { + "mappedName": "sl1_rd_", + "svgId": "sl1_rd" + }, + { + "mappedName": "il1_fetch_", + "svgId": "il1_fetch" + }, + { + "mappedName": "lds_lat_", + "svgId": "lds_lat" + }, + { + "mappedName": "lds_bw_", + "svgId": "lds_bw" + }, + { + "mappedName": "lds_util_", + "svgId": "lds_util" + }, + { + "mappedName": "vl1_hit_", + "svgId": "vl1_hit" + }, + { + "mappedName": "vl1_lat_", + "svgId": "vl1_lat" + }, + { + "mappedName": "vl1_coales_", + "svgId": "vl1_coales" + }, + { + "mappedName": "vl1_stall_", + "svgId": "vl1_stall" + }, + { + "mappedName": "sl1_hit_", + "svgId": "sl1_hit" + }, + { + "mappedName": "sl1_lat_", + "svgId": "sl1_lat" + }, + { + "mappedName": "il1_hit_", + "svgId": "il1_hit" + }, + { + "mappedName": "il1_lat_", + "svgId": "il1_lat" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "il1_l2_rd_", + "svgId": "il1_l2_rd" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "l2_rd_", + "svgId": "l2_rd" + }, + { + "mappedName": "l2_wr_", + "svgId": "l2_wr" + }, + { + "mappedName": "l2_atom_", + "svgId": "l2_atom" + }, + { + "mappedName": "l2_hit_", + "svgId": "l2_hit" + }, + { + "mappedName": "l2_rd_lat_", + "svgId": "l2_rd_lat" + }, + { + "mappedName": "l2_wr_lat_", + "svgId": "l2_wr_lat" + }, + { + "mappedName": "l2_fabric_rd_", + "svgId": "l2_fabric_rd" + }, + { + "mappedName": "l2_fabric_wr_", + "svgId": "l2_fabric_wr" + }, + { + "mappedName": "l2_fabric_atom_", + "svgId": "l2_fabric_atom" + }, + { + "mappedName": "fabric_rd_lat_", + "svgId": "fabric_rd_lat" + }, + { + "mappedName": "fabric_wr_lat_", + "svgId": "fabric_wr_lat" + }, + { + "mappedName": "fabric_atom_lat_", + "svgId": "fabric_atom_lat" + }, + { + "mappedName": "fabric_hbm_rd_", + "svgId": "fabric_hbm_rd" + }, + { + "mappedName": "fabric_hbm_wr_", + "svgId": "fabric_hbm_wr" + }, + { + "mappedName": "vl1_rd_", + "svgId": "vl1_rd" + }, + { + "mappedName": "vl1_l2_rd_", + "svgId": "vl1_l2_rd" + }, + { + "mappedName": "vl1_l2_wr_", + "svgId": "vl1_l2_wr" + }, + { + "mappedName": "vl1_l2_atom_", + "svgId": "vl1_l2_atom" + }, + { + "mappedName": "hbm_rd_", + "svgId": "hbm_rd" + }, + { + "mappedName": "hbm_wr_", + "svgId": "hbm_wr" + } + ], + "svgSource": "\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n Wave Occupancy\r\n \r\n Wave Life\r\n \r\n \r\n \r\n xGMI /\r\n PCIe\r\n \r\n GMI\r\n \r\n HBM\r\n \r\n Fabric\r\n \r\n \r\n SALU:\r\n 00000\r\n \r\n \r\n SMEM:\r\n 00000\r\n \r\n \r\n VALU:\r\n 00000\r\n \r\n \r\n MFMA:\r\n 00000\r\n \r\n \r\n VMEM:\r\n 00000\r\n \r\n \r\n LDS:\r\n 00000\r\n \r\n \r\n GWS:\r\n 00000\r\n \r\n \r\n Br:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n \r\n \r\n cycles\r\n Atomic:\r\n 00000\r\n \r\n \r\n Rd:\r\n 00000\r\n \r\n \r\n Wr:\r\n \r\n \r\n 00000\r\n \r\n \r\n Atomic:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n 00000\r\n Rd:\r\n 00000\r\n Wr:\r\n 00000\r\n Req:\r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n per-GCD\r\n cycles\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n Wave 0 Instr buff\r\n Wave N-1 Instr buff\r\n Active CUs\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Util:\r\n 00000\r\n \r\n \r\n %\r\n Coales:\r\n 00000\r\n Exec\r\n Instr Buff\r\n Instr Dispatch\r\n LDS\r\n Vector L1 Cache\r\n Scalar L1D Cache\r\n Instr L1 Cache\r\n L2 Cache\r\n 00000\r\n Req:\r\n \r\n \r\n %\r\n Stall:\r\n 00000\r\n 00000\r\n Fetch:\r\n 0000000\r\n 00000\r\n 000/000\r\n \r\n Latency\r\n \r\n LDS Alloc:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n \r\n Scratch Alloc:\r\n \r\n 00000\r\n \r\n Wavefronts:\r\n \r\n 00000\r\n \r\n Workgroups:\r\n \r\n 00000\r\n \r\n VGPRs:\r\n \r\n 00000\r\n \r\n SGPRs:\r\n \r\n 00000\r\n \r\n \r\n 00000\r\n Rd:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n Latency\r\n \r\n \r\n \r\n \r\n Text is not SVG - cannot display\r\n \r\n \r\n" + }, + "pluginVersion": "8.4.0", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&vgpr\"\n },\n \"sgpr\": {\n \"$avg\": \"&sgpr\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&lds\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&scr\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$numCU\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_LEVEL_WAVES", + "target": "$Workload1.SQ_LEVEL_WAVES.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_occ\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\",\"&GRBM_GUI_ACTIVE\"] }, $numActiveCUs]}\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Alias\": \"wave_occ_\",\n \"Value\":{ \"$round\": [\"&wave_occ\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "$Workload1.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"lds_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&SQ_INSTS_LDS\", 0] },\n { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\"] },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"LDS Lat\",\n \"Alias\": \"lds_lat_\",\n \"Value\":{ \"$round\": [\"&lds_lat\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_ICACHE_INFLIGHT", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"il1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_ICACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_ICACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"IL1 Lat\",\n \t\t\t\"Alias\": \"il1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&il1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_DCACHE_INFLIGHT_LEVEL", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"sl1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_DCACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_DCACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"vL1D Lat\",\n \t\t\t\"Alias\": \"sl1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&sl1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + } + ], + "title": "Memory Chart (Normalization: $normUnit\")", + "transformations": [ + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "string", + "targetField": "Value" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "amd-custom-svg" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Memory Chart Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 241, + "panels": [ + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 253, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm" + }, + "name": "HBM-VLAU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2" + }, + "name": "L2-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1" + }, + "name": "vL1D-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS" + }, + "name": "LDS-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA" + }, + "name": "HBM-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA" + }, + "name": "L2-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA" + }, + "name": "vL1D-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA" + }, + "name": "LDS-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "rawQuery": true, + "refId": "HBM-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"&high_flop\"\n }\n },\n\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP32/FP64 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + }, + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 312, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_fp16" + }, + "name": "HBM-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_fp16" + }, + "name": "L2-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_fp16" + }, + "name": "vL1D-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_fp16" + }, + "name": "LDS-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_i8" + }, + "name": "HBM-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_i8" + }, + "name": "L2-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_i8" + }, + "name": "vL1D-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_i8" + }, + "name": "LDS-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP16/INT8 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Roofline Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 2, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 13, + "x": 0, + "y": 6 + }, + "id": 6, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Fetcher", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 171 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baselin)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 147 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 11, + "x": 13, + "y": 6 + }, + "id": 4, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Compute", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Metric 1": "", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Command Processor (CPC/CPF)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 102, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 101 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 145 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 97 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 123 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 106, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 285 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 242 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 104, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Resource Allocation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Shader Processor Input (SPI)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 185, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 142 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 196 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 174 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.width", + "value": 168 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min" + }, + "properties": [ + { + "id": "custom.width", + "value": 272 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 225 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "interval": "12h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Wavefront Launch Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true, + "Units 2": true, + "metric 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 223 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 34, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "Wavefront Runtime Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg": "", + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "", + "Unit 2": "" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Wavefront", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 209, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 12, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n \n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Instruction Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 24, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 24, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "limit": 100, + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VALU Arithmetic Instr Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 275, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VMEM Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "VMEM Instr", + "type 1": "VMEM Instr" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 16, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mmfa_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mmfa_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mmfa_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mmfa_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mmfa_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mmfa_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mmfa_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&SQ_WAVES\" ] }\n },\n \"mmfa_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&SQ_WAVES\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&SQ_WAVES\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mmfa_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mmfa_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mmfa_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "MFMA Arithmetic Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "MFMA Instr", + "type 1": "MFMA Instr" + } + } + } + ], + "transparent": true, + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Instruction Mix", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 8, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 211, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n\n \"instr_val\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Compute Pipeline", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "mfma_flops_bf16_pop 1": 4, + "mfma_flops_bf16_pop 2": 5, + "mfma_flops_f16_pop 1": 6, + "mfma_flops_f16_pop 2": 7, + "mfma_flops_f32_pop 1": 8, + "mfma_flops_f32_pop 2": 9, + "mfma_flops_f64_pop 1": 10, + "mfma_flops_f64_pop 2": 11, + "mfma_flops_i8_pop 1": 12, + "mfma_flops_i8_pop 2": 13, + "valu_flops_pop 1": 0, + "valu_flops_pop 2": 1, + "valu_iops_pop 1": 2, + "valu_iops_pop 2": 3 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "flops_pop": "FLOPs", + "flops_pop 1": "FLOPs (Current)", + "flops_pop 2": "FLOPs (Baseline)", + "iops_pop": "IOPs", + "iops_pop 1": "IOPs (Current)", + "iops_pop 2": "IOPs (Baseline)", + "mfma_flops_bf16_pop": "MFMA- BF16 (FLOPs)", + "mfma_flops_bf16_pop 1": "MFMA-BF16 (Cur)", + "mfma_flops_bf16_pop 2": "MFMA-BF16 (Baseline)", + "mfma_flops_f16_pop": "MFMA-F16 (FLOPs)", + "mfma_flops_f16_pop 1": "MFMA-F16 (Cur)", + "mfma_flops_f16_pop 2": "MFMA-F16 (Baseline)", + "mfma_flops_f32_pop": "MFMA-F32 (FLOPs)", + "mfma_flops_f32_pop 1": "MFMA-F32 (Cur)", + "mfma_flops_f32_pop 2": "MFMA-F32 (Baseline)", + "mfma_flops_f64_pop": "MFMA-F64 (FLOPs)", + "mfma_flops_f64_pop 1": "MFMA-F64 (Cur)", + "mfma_flops_f64_pop 2": "MFMA-F64 (Baseline)", + "mfma_flops_i8_pop": "MFMA-i8 (IOPs)", + "mfma_flops_i8_pop 1": "MFMA-I8 (Cur)", + "mfma_flops_i8_pop 2": "MFMA-I8 (Baseline)", + "valu_flops_pop": "VALU (FLOPs)", + "valu_flops_pop 1": "VALU FLOPs (Cur)", + "valu_flops_pop 2": "VALU FLOPs (Baseline)", + "valu_iops_pop": "VALU (IOPs)", + "valu_iops_pop 1": "VALU IOPs (Cur)", + "valu_iops_pop 2": "VALU IOPs (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 257, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg\": \"&avg_ipcAvg\",\n \"Min\": \"&min_ipcAvg\",\n \"Max\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg\": \"&avg_ipcIssue\",\n \"Min\": \"&min_ipcIssue\",\n \"Max\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg\": \"&avg_saluUtil\",\n \"Min\": \"&min_saluUtil\",\n \"Max\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg\": \"&avg_valuUtil\",\n \"Min\": \"&min_valuUtil\",\n \"Max\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg\": \"&avg_unpredthreads_val\",\n \"Min\": \"&min_unpredthreads_val\",\n \"Max\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg\": \"&avg_mfmaUtil\",\n \"Min\": \"&min_mfmaUtil\",\n \"Max\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg\": \"&avg_mfmaInstrCycles\",\n \"Min\": \"&min_mfmaInstrCycles\",\n \"Max\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg 2\": \"&avg_ipcAvg\",\n \"Min 2\": \"&min_ipcAvg\",\n \"Max 2\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg 2\": \"&avg_ipcIssue\",\n \"Min 2\": \"&min_ipcIssue\",\n \"Max 2\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg 2\": \"&avg_saluUtil\",\n \"Min 2\": \"&min_saluUtil\",\n \"Max 2\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg 2\": \"&avg_valuUtil\",\n \"Min 2\": \"&min_valuUtil\",\n \"Max 2\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg 2\": \"&avg_unpredthreads_val\",\n \"Min 2\": \"&min_unpredthreads_val\",\n \"Max 2\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg 2\": \"&avg_mfmaUtil\",\n \"Min 2\": \"&min_mfmaUtil\",\n \"Max 2\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg 2\": \"&avg_mfmaInstrCycles\",\n \"Min 2\": \"&min_mfmaInstrCycles\",\n \"Max 2\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Pipeline Stats", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg 2": "Avg (Baseline)", + "Max 2": "Max (Baseline)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 96, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Arithmetic Operations", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 255, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM", + "target": "${Workload1}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg\": \"&avg_vmemLat\",\n \"Min\": \"&min_vmemLat\",\n \"Max\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM", + "target": "${Workload1}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg\":\"&avg_smemLat\",\n \"Min\":\"&min_smemLat\",\n \"Max\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL", + "target": "${Workload1}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg\":\"&avg_instrFetchLat\",\n \"Min\":\"&min_instrFetchLat\",\n \"Max\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "${Workload1}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg\":\"&avg_ldsLat\",\n \"Min\":\"&min_ldsLat\",\n \"Max\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg 2\": \"&avg_vmemLat\",\n \"Min 2\": \"&min_vmemLat\",\n \"Max 2\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg 2\":\"&avg_smemLat\",\n \"Min 2\":\"&min_smemLat\",\n \"Max 2\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL2", + "target": "${Workload2}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg 2\":\"&avg_instrFetchLat\",\n \"Min 2\":\"&min_instrFetchLat\",\n \"Max 2\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS2", + "target": "${Workload2}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg 2\":\"&avg_ldsLat\",\n \"Min 2\":\"&min_ldsLat\",\n \"Max 2\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + } + ], + "title": "Memory Latencies", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Compute Pipeline", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 98, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 205, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: LDS", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Access Rate 1": 6, + "Access Rate 2": 7, + "Bandwith (Pct-of-Peak) 1": 0, + "Bandwith (Pct-of-Peak) 2": 1, + "Bank Conflict Rate 1": 2, + "Bank Conflict Rate 2": 3, + "Utilization 1": 4, + "Utilization 2": 5 + }, + "renameByName": { + "Access Rate 1": "Access Rate (Current)", + "Access Rate 2": "Access Rate (Baseline)", + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "Utilization 1": "Util (Current)", + "Utilization 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "min": -100000000000000000000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 141 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "LDS Stats", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "frame", + "frameNameMode": "field" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Local Data Share (LDS)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 44, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 48, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\" ] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Instruction Cache ", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW (Pct-of-Peak) 1": 4, + "BW (Pct-of-Peak) 2": 5, + "Cache Hit 1": 6, + "Cache Hit 2": 7, + "Stall 1": 2, + "Stall 2": 3, + "Util 1": 0, + "Util 2": 1 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 259, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&avg_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&avg_hits\",\n \"Min\": \"&min_hits\",\n \"Max\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&avg_misses\",\n \"Min\": \"&min_misses\",\n \"Max\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean\": \"&avg_misses_dup\",\n \"Min\": \"&min_misses_dup\",\n \"Max\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n \n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&avg_cacheHit\",\n \"Min\": \"&min_cacheHit\",\n \"Max\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&avg_req\",\n \"Min 2\": \"&min_req\",\n \"Max 2\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&avg_hits\",\n \"Min 2\": \"&min_hits\",\n \"Max 2\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&avg_misses\",\n \"Min 2\" : \"&min_misses\",\n \"Max 2\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean 2\": \"&avg_misses_dup\",\n \"Min 2\": \"&min_misses_dup\",\n \"Max 2\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&avg_cacheHit\",\n \"Min 2\": \"&min_cacheHit\",\n \"Max 2\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Instruction Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "L1I Metric": "", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Instruction Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 203, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L1K-TC BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 54, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "sY628IJnz" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Scalar L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW Pct-of-Peak 1": 0, + "BW Pct-of-Peak 2": 1, + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "Stall 1": 6, + "Stall 2": 7, + "Util 1": 4, + "Util 2": 5 + }, + "renameByName": { + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 261, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\": \"&req_min\",\n \"Max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\": \"&hits_min\",\n \"Max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&misses_avg\",\n \"Min\": \"&misses_min\",\n \"Max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean\": \"&dup_misses_avg\",\n \"Min\": \"&dup_misses_min\",\n \"Max\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&cacheHit_avg\",\n \"Min\": \"&cacheHit_min\",\n \"Max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean\": \"&read1d_avg\",\n \"Min\": \"&read1d_min\",\n \"Max\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean\": \"&read2d_avg\",\n \"Min\": \"&read2d_min\",\n \"Max\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean\": \"&read4d_avg\",\n \"Min\": \"&read4d_min\",\n \"Max\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean\": \"&read8d_avg\",\n \"Min\": \"&read8d_min\",\n \"Max\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean\": \"&read16d_avg\",\n \"Min\": \"&read16d_min\",\n \"Max\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }}, \n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&req_avg\",\n \"Min 2\": \"&req_min\",\n \"Max 2\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&hits_avg\",\n \"Min 2\": \"&hits_min\",\n \"Max 2\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&misses_avg\",\n \"Min 2\": \"&misses_min\",\n \"Max 2\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean 2\": \"&dup_misses_avg\",\n \"Min 2\": \"&dup_misses_min\",\n \"Max 2\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&cacheHit_avg\",\n \"Min 2\": \"&cacheHit_min\",\n \"Max 2\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean 2\": \"&readReq_avg\",\n \"Min 2\": \"&readReq_min\",\n \"Max 2\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req (Total)\",\n \"Mean 2\": \"&writeReq_avg\",\n \"Min 2\": \"&writeReq_min\",\n \"Max 2\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean 2\": \"&atomicReq_avg\",\n \"Min 2\": \"&atomicReq_min\",\n \"Max 2\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean 2\": \"&read1d_avg\",\n \"Min 2\": \"&read1d_min\",\n \"Max 2\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean 2\": \"&read2d_avg\",\n \"Min 2\": \"&read2d_min\",\n \"Max 2\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean 2\": \"&read4d_avg\",\n \"Min 2\": \"&read4d_min\",\n \"Max 2\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean 2\": \"&read8d_avg\",\n \"Min 2\": \"&read8d_min\",\n \"Max 2\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean 2\": \"&read16d_avg\",\n \"Min 2\": \"&read16d_min\",\n \"Max 2\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 134 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 52, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache - L2 Interface", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Scalar L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 130, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 132, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "TA", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 136 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 134, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "TD", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Texture Addresser and Texture Data (TA/TD)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 112, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 165, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Vector L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "128B Read Combining 1": 6, + "128B Read Combining 2": 7, + "Buffer Coalescing 1": 0, + "Buffer Coalescing 2": 1, + "Cache BW 1": 2, + "Cache BW 2": 3, + "Cache Hit 1": 4, + "Cache Hit 2": 5 + }, + "renameByName": { + "128B Read Combining 1": "128B Read Combining (Current)", + "128B Read Combining 2": "128B Read Combining(Baseline)", + "Buffer Coalescing 1": "Buf Coalescing (Current)", + "Buffer Coalescing 2": "Buf Coalescing (Baseline)", + "Cache BW 1": "Cache BW (Current)", + "Cache BW 2": "Cache BW (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Cache Util 1": "Cache Util (Current)", + "Cache Util 2": "Cache Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-background" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 52 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 199 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 116, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Stalls", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true, + "unit 2": true + }, + "indexByName": { + "Max 1": 6, + "Max 2": 7, + "Mean 1": 2, + "Mean 2": 3, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 4, + "Min 2": 5, + "unit 1": 9, + "unit 2": 8 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "unit 1": "Unit" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 116 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 78 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 50 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 128, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n\n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Coherency", + "GroupCols": 2, + "GroupGap": 5, + "GroupLabelColor": "#FF9830", + "GroupLabelFontSize": "100%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Xfer", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FADE2A", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:172", + "Col": 2, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Mean", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 120, + "pluginVersion": "8.2.1", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n\n \"readNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \n \"writeNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \n \"atomicNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&readNC_avg\",\n \"Min\": \"&readNC_min\",\n \"Max\": \"&readNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&readUC_avg\",\n \"Min\": \"&readUC_min\",\n \"Max\": \"&readUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&readCC_avg\",\n \"Min\": \"&readCC_min\",\n \"Max\": \"&readCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&readRW_avg\",\n \"Min\": \"&readRW_min\",\n \"Max\": \"&readRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&writeRW_avg\",\n \"Min\": \"&writeRW_min\",\n \"Max\": \"&writeRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&writeNC_avg\",\n \"Min\": \"&writeNC_min\",\n \"Max\": \"&writeNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&writeUC_avg\",\n \"Min\": \"&writeUC_min\",\n \"Max\": \"&writeUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&writeCC_avg\",\n \"Min\": \"&writeCC_min\",\n \"Max\": \"&writeCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&atomicNC_avg\",\n \"Min\": \"&atomicNC_min\",\n \"Max\": \"&atomicNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&atomicUC_avg\",\n \"Min\": \"&atomicUC_min\",\n \"Max\": \"&atomicUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&atomicCC_avg\",\n \"Min\": \"&atomicCC_min\",\n \"Max\": \"&atomicCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&atomicRW_avg\",\n \"Min\": \"&atomicRW_min\",\n \"Max\": \"&atomicRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D - L2 Transactions Req $normUnit", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Units" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 124, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Addr Translation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Units 1": 9, + "Units 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Vector L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 56, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + }, + { + "id": "color" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Util" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 100 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache Hit" + }, + "properties": [ + { + "id": "max", + "value": 100 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Wr BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 64, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks2\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: L2 Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "L2 Util 1": 0, + "L2 Util 2": 1, + "L2-EA Rd BW 1": 4, + "L2-EA Rd BW 2": 5, + "L2-EA Wr BW 1": 6, + "L2-EA Wr BW 2": 7 + }, + "renameByName": { + "Cache Hit 1": "L2 Cache Hit (Current)", + "Cache Hit 2": "L2 Cache Hit (Baseline)", + "L2 Util 1": "L2 Util (Current)", + "L2 Util 2": "L2 Util (Baseline)", + "L2-EA Rd BW - GB/s 1": "L2-EA RD BW (Current)", + "L2-EA Rd BW - GB/s 2": "L2-EA RD BW (baseline)", + "L2-EA Rd BW 1": "L2-EA Rd BW (Current)", + "L2-EA Rd BW 2": "L2-EA Rd BW (Baseline)", + "L2-EA Wr BW - GB/s 1": "L2-EA WR BW (Current)", + "L2-EA Wr BW - GB/s 2": "L2-EA WR BW (Baseline)", + "L2-EA Wr BW 1": "L2-EA Wr BW (Current)", + "L2-EA Wr BW 2": "L2-EA Wr BW (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 62, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Transactions", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 178 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + } + ] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 58, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "L2 Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Transaction", + "GroupCols": 1, + "GroupGap": 5, + "GroupLabelColor": "#FADE2A", + "GroupLabelFontSize": "120%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Metric", + "LabelColor": "#ffffff", + "LabelFontSize": "80%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FF9830", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:81", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 60, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"ioStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"ioStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"creditStarvation_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_min\": {\n \"$min\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_max\": {\n \"$max\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n } \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_READ_avg\",\n \"Min\": \"&ioStall_READ_min\",\n \"Max\": \"&ioStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_READ_avg\",\n \"Min\": \"&gmiStall_READ_min\",\n \"Max\": \"&gmiStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_READ_avg\",\n \"Min\": \"&hbmStall_READ_min\",\n \"Max\": \"&hbmStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_WRITE_avg\",\n \"Min\": \"&ioStall_WRITE_min\",\n \"Max\": \"&ioStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_WRITE_avg\",\n \"Min\": \"&gmiStall_WRITE_min\",\n \"Max\": \"&gmiStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_WRITE_avg\",\n \"Min\": \"&hbmStall_WRITE_min\",\n \"Max\": \"&hbmStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Credit Starvation\",\n \"Transaction\": \"Write\",\n \"Target\": \"Fabric\",\n \"Avg\": \"&creditStarvation_avg\",\n \"Min\": \"&creditStarvation_min\",\n \"Max\": \"&creditStarvation_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Interface Stalls (Cycles $normUnit)", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 66, + "panels": [ + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 87, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 Cache Hit Rate (Percent) (Channel 0 - 15) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 92, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "Cache Hit Rate % (Channel 16 - 31) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:565", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 81, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Read Requests(Channel 0-15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:656", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 82, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L 2 Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:697", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 83, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:750", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 84, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 85, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 91, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 189, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 195, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 191, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 197, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 193, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Request (Channel 0 - 15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 199, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "", + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 73 + }, + "hideTimeOverride": false, + "id": 68, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b0_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[0]\"]}, \n { \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b0_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[0]\"}, \"&denom\"] } \n },\n \"b0_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[0]\"}, \"&denom\"] } \n },\n \"b0_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[0]\"}, \"&denom\"] } \n },\n \"b0_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[0]\"}, \"&denom\"] } \n },\n \"b0_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[0]\"}, \"&denom\"] }\n },\n \"b0_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[0]\"}, \"&denom\"] } \n },\n \"b0_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[0]\"}, \"&denom\"] } \n },\n\n \"b0_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[0]\", \"&TCC_EA_RDREQ[0]\"]}, null] } },\n \"b0_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[0]\", \"&TCC_EA_WRREQ[0]\"]}, null] } },\n \"b0_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[0]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[0]\", \"&TCC_EA_ATOMIC[0]\"]}, null]}},\n\n \"b0_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"}, \"&denom\"] }},\n\n \n \"b1_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[1]\"]}, \n { \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b1_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[1]\"}, \"&denom\"] } \n },\n \"b1_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[1]\"}, \"&denom\"] } \n },\n \"b1_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[1]\"}, \"&denom\"] } \n },\n \"b1_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[1]\"}, \"&denom\"] }\n },\n \"b1_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[1]\"}, \"&denom\"] } \n },\n \"b1_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[1]\", \"&TCC_EA_RDREQ[1]\"]}, null] } },\n \"b1_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[1]\", \"&TCC_EA_WRREQ[1]\"]}, null] } },\n \"b1_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[1]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[1]\", \"&TCC_EA_ATOMIC[1]\"]}, null]}},\n\n \"b1_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"}, \"&denom\"] }},\n\n\n \"b2_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[2]\"]}, \n { \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b2_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[2]\"}, \"&denom\"] }\n },\n \"b2_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[2]\"}, \"&denom\"] } \n },\n \"b2_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[2]\"}, \"&denom\"] }\n },\n \"b2_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[2]\", \"&TCC_EA_RDREQ[2]\"]}, null] } },\n \"b2_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[2]\", \"&TCC_EA_WRREQ[2]\"]}, null] } },\n \"b2_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[2]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[2]\", \"&TCC_EA_ATOMIC[2]\"]}, null]}},\n\n \"b2_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"}, \"&denom\"] }},\n\n\n \n \"b3_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[3]\"]}, \n { \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b3_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[3]\"}, \"&denom\"] } \n },\n \"b3_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[3]\"}, \"&denom\"] } \n },\n \"b3_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[3]\"}, \"&denom\"] }\n },\n \"b3_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[3]\"}, \"&denom\"] }\n },\n \"b3_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[3]\"}, \"&denom\"] } \n },\n \"b3_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[3]\", \"&TCC_EA_RDREQ[3]\"]}, null] } },\n \"b3_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[3]\", \"&TCC_EA_WRREQ[3]\"]}, null] } },\n \"b3_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[3]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[3]\", \"&TCC_EA_ATOMIC[3]\"]}, null]}},\n\n \"b3_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"}, \"&denom\"] }},\n\n\n \n \"b4_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[4]\"]}, \n { \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b4_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[4]\"}, \"&denom\"] } \n },\n \"b4_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[4]\"}, \"&denom\"] } \n },\n \"b4_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[4]\"}, \"&denom\"] }\n },\n \"b4_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[4]\", \"&TCC_EA_RDREQ[4]\"]}, null] } },\n \"b4_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[4]\", \"&TCC_EA_WRREQ[4]\"]}, null] } },\n \"b4_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[4]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[4]\", \"&TCC_EA_ATOMIC[4]\"]}, null]}},\n\n \"b4_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"}, \"&denom\"] }},\n\n\n \n \"b5_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[5]\"]}, \n { \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b5_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[5]\"}, \"&denom\"] } \n },\n \"b5_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[5]\"}, \"&denom\"] } \n },\n \"b5_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[5]\"}, \"&denom\"] } \n },\n \"b5_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[5]\", \"&TCC_EA_RDREQ[5]\"]}, null] } },\n \"b5_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[5]\", \"&TCC_EA_WRREQ[5]\"]}, null] } },\n \"b5_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[5]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[5]\", \"&TCC_EA_ATOMIC[5]\"]}, null]}},\n\n \"b5_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"}, \"&denom\"] }},\n\n\n \n \"b6_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[6]\"]}, \n { \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b6_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[6]\"}, \"&denom\"] } \n },\n \"b6_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[6]\"}, \"&denom\"] } \n },\n \"b6_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[6]\"}, \"&denom\"] } \n },\n \"b6_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[6]\"}, \"&denom\"] } \n },\n \"b6_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[6]\"}, \"&denom\"] }\n },\n \"b6_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[6]\", \"&TCC_EA_RDREQ[6]\"]}, null] } },\n \"b6_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[6]\", \"&TCC_EA_WRREQ[6]\"]}, null] } },\n \"b6_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[6]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[6]\", \"&TCC_EA_ATOMIC[6]\"]}, null]}},\n\n \"b6_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"}, \"&denom\"] }},\n\n\n \n \"b7_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[7]\"]}, \n { \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b7_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[7]\"}, \"&denom\"] } \n },\n \"b7_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[7]\"}, \"&denom\"] } \n },\n \"b7_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[7]\"}, \"&denom\"] } \n },\n \"b7_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[7]\"}, \"&denom\"] } \n },\n \"b7_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[7]\"}, \"&denom\"] }\n },\n \"b7_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[7]\", \"&TCC_EA_RDREQ[7]\"]}, null] } },\n \"b7_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[7]\", \"&TCC_EA_WRREQ[7]\"]}, null] } },\n \"b7_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[7]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[7]\", \"&TCC_EA_ATOMIC[7]\"]}, null]}},\n\n \"b7_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"}, \"&denom\"] }},\n\n\n \n \"b8_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[8]\"]}, \n { \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b8_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[8]\"}, \"&denom\"] } \n },\n \"b8_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[8]\"}, \"&denom\"] } \n },\n \"b8_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[8]\"}, \"&denom\"] } \n },\n \"b8_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[8]\", \"&TCC_EA_RDREQ[8]\"]}, null] } },\n \"b8_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[8]\", \"&TCC_EA_WRREQ[8]\"]}, null] } },\n \"b8_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[8]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[8]\", \"&TCC_EA_ATOMIC[8]\"]}, null]}},\n\n \"b8_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"}, \"&denom\"] }},\n\n\n \n \"b9_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[9]\"]}, \n { \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b9_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[9]\"}, \"&denom\"] } \n },\n \"b9_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[9]\"}, \"&denom\"] } \n },\n \"b9_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[9]\"}, \"&denom\"] } \n },\n \"b9_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[9]\", \"&TCC_EA_RDREQ[9]\"]}, null] } },\n \"b9_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[9]\", \"&TCC_EA_WRREQ[9]\"]}, null] } },\n \"b9_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[9]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[9]\", \"&TCC_EA_ATOMIC[9]\"]}, null]}},\n\n \"b9_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"}, \"&denom\"] }},\n\n\n \n \"b10_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[10]\"]}, \n { \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b10_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[10]\"}, \"&denom\"] } \n },\n \"b10_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[10]\"}, \"&denom\"] } \n },\n \"b10_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[10]\"}, \"&denom\"] } \n },\n \"b10_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[10]\", \"&TCC_EA_RDREQ[10]\"]}, null] } },\n \"b10_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[10]\", \"&TCC_EA_WRREQ[10]\"]}, null] } },\n \"b10_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[10]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[10]\", \"&TCC_EA_ATOMIC[10]\"]}, null]}},\n\n \"b10_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"}, \"&denom\"] }},\n\n\n \n \"b11_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[11]\"]}, \n { \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b11_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[11]\"}, \"&denom\"] } \n },\n \"b11_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[11]\"}, \"&denom\"] } \n },\n \"b11_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[11]\"}, \"&denom\"] } \n },\n \"b11_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[11]\", \"&TCC_EA_RDREQ[11]\"]}, null] } },\n \"b11_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[11]\", \"&TCC_EA_WRREQ[11]\"]}, null] } },\n \"b11_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[11]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[11]\", \"&TCC_EA_ATOMIC[11]\"]}, null]}},\n\n \"b11_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"}, \"&denom\"] }},\n\n\n \n \"b12_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[12]\"]}, \n { \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b12_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[12]\"}, \"&denom\"] } \n },\n \"b12_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[12]\"}, \"&denom\"] } \n },\n \"b12_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[12]\"}, \"&denom\"] } \n },\n \"b12_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[12]\", \"&TCC_EA_RDREQ[12]\"]}, null] } },\n \"b12_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[12]\", \"&TCC_EA_WRREQ[12]\"]}, null] } },\n \"b12_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[12]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[12]\", \"&TCC_EA_ATOMIC[12]\"]}, null]}},\n\n \"b12_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"}, \"&denom\"] }},\n\n\n \n \"b13_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[13]\"]}, \n { \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b13_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[13]\"}, \"&denom\"] } \n },\n \"b13_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[13]\"}, \"&denom\"] } \n },\n \"b13_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[13]\"}, \"&denom\"] } \n },\n \"b13_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[13]\"}, \"&denom\"] } \n },\n \"b13_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[13]\"}, \"&denom\"] }\n },\n \"b13_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[13]\", \"&TCC_EA_RDREQ[13]\"]}, null] } },\n \"b13_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[13]\", \"&TCC_EA_WRREQ[13]\"]}, null] } },\n \"b13_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[13]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[13]\", \"&TCC_EA_ATOMIC[13]\"]}, null]}},\n\n \"b13_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"}, \"&denom\"] }},\n\n\n \n \"b14_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[14]\"]}, \n { \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b14_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[14]\"}, \"&denom\"] } \n },\n \"b14_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[14]\"}, \"&denom\"] } \n },\n \"b14_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[14]\"}, \"&denom\"] } \n },\n \"b14_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[14]\", \"&TCC_EA_RDREQ[14]\"]}, null] } },\n \"b14_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[14]\", \"&TCC_EA_WRREQ[14]\"]}, null] } },\n \"b14_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[14]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[14]\", \"&TCC_EA_ATOMIC[14]\"]}, null]}},\n\n \"b14_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"}, \"&denom\"] }},\n\n\n \n \"b15_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[15]\"]}, \n { \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b15_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[15]\"}, \"&denom\"] } \n },\n \"b15_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[15]\"}, \"&denom\"] } \n },\n \"b15_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[15]\"}, \"&denom\"] } \n },\n \"b15_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[15]\"}, \"&denom\"] } \n },\n \"b15_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[15]\"}, \"&denom\"] }\n },\n \"b15_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[15]\", \"&TCC_EA_RDREQ[15]\"]}, null] } },\n \"b15_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[15]\", \"&TCC_EA_WRREQ[15]\"]}, null] } },\n \"b15_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[15]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[15]\", \"&TCC_EA_ATOMIC[15]\"]}, null]}},\n\n \"b15_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"}, \"&denom\"] }}\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"0\",\n \"Hit Rate\": \"&b0_hitRate\",\n \"Req\": \"&b0_req\",\n \"Read Req\": \"&b0_readReq\",\n \"Write Req\": \"&b0_writeReq\",\n \"AtomicReq\": \"&b0_atomicReq\",\n \"EA Read Req\": \"&b0_eaReadReq\",\n \"EA Write Req\": \"&b0_eaWriteReq\",\n \"EA AtomicReq\": \"&b0_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b0_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b0_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b0_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b0_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b0_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b0_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b0_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b0_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b0_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b0_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n },\n {\n \"Channel\": \"1\",\n \"Hit Rate\": \"&b1_hitRate\",\n \"Req\": \"&b1_req\",\n \"Read Req\": \"&b1_readReq\",\n \"Write Req\": \"&b1_writeReq\",\n \"AtomicReq\": \"&b1_atomicReq\",\n \"EA Read Req\": \"&b1_eaReadReq\",\n \"EA Write Req\": \"&b1_eaWriteReq\",\n \"EA AtomicReq\": \"&b1_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b1_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b1_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b1_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b1_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b1_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b1_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b1_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b1_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b1_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b1_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n },\n {\n \"Channel\": \"2\",\n \"Hit Rate\": \"&b2_hitRate\",\n \"Req\": \"&b2_req\",\n \"Read Req\": \"&b2_readReq\",\n \"Write Req\": \"&b2_writeReq\",\n \"AtomicReq\": \"&b2_atomicReq\",\n \"EA Read Req\": \"&b2_eaReadReq\",\n \"EA Write Req\": \"&b2_eaWriteReq\",\n \"EA AtomicReq\": \"&b2_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b2_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b2_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b2_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b2_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b2_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b2_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b2_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b2_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b2_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b2_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"3\",\n \"Hit Rate\": \"&b3_hitRate\",\n \"Req\": \"&b3_req\",\n \"Read Req\": \"&b3_readReq\",\n \"Write Req\": \"&b3_writeReq\",\n \"AtomicReq\": \"&b3_atomicReq\",\n \"EA Read Req\": \"&b3_eaReadReq\",\n \"EA Write Req\": \"&b3_eaWriteReq\",\n \"EA AtomicReq\": \"&b3_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b3_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b3_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b3_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b3_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b3_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b3_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b3_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b3_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b3_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b3_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"4\",\n \"Hit Rate\": \"&b4_hitRate\",\n \"Req\": \"&b4_req\",\n \"Read Req\": \"&b4_readReq\",\n \"Write Req\": \"&b4_writeReq\",\n \"AtomicReq\": \"&b4_atomicReq\",\n \"EA Read Req\": \"&b4_eaReadReq\",\n \"EA Write Req\": \"&b4_eaWriteReq\",\n \"EA AtomicReq\": \"&b4_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b4_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b4_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b4_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b4_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b4_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b4_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b4_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b4_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b4_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b4_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"5\",\n \"Hit Rate\": \"&b5_hitRate\",\n \"Req\": \"&b5_req\",\n \"Read Req\": \"&b5_readReq\",\n \"Write Req\": \"&b5_writeReq\",\n \"AtomicReq\": \"&b5_atomicReq\",\n \"EA Read Req\": \"&b5_eaReadReq\",\n \"EA Write Req\": \"&b5_eaWriteReq\",\n \"EA AtomicReq\": \"&b5_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b5_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b5_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b5_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b5_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b5_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b5_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b5_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b5_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b5_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b5_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"6\",\n \"Hit Rate\": \"&b6_hitRate\",\n \"Req\": \"&b6_req\",\n \"Read Req\": \"&b6_readReq\",\n \"Write Req\": \"&b6_writeReq\",\n \"AtomicReq\": \"&b6_atomicReq\",\n \"EA Read Req\": \"&b6_eaReadReq\",\n \"EA Write Req\": \"&b6_eaWriteReq\",\n \"EA AtomicReq\": \"&b6_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b6_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b6_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b6_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b6_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b6_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b6_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b6_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b6_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b6_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b6_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"7\",\n \"Hit Rate\": \"&b7_hitRate\",\n \"Req\": \"&b7_req\",\n \"Read Req\": \"&b7_readReq\",\n \"Write Req\": \"&b7_writeReq\",\n \"AtomicReq\": \"&b7_atomicReq\",\n \"EA Read Req\": \"&b7_eaReadReq\",\n \"EA Write Req\": \"&b7_eaWriteReq\",\n \"EA AtomicReq\": \"&b7_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b7_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b7_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b7_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b7_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b7_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b7_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b7_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b7_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b7_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b7_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"8\",\n \"Hit Rate\": \"&b8_hitRate\",\n \"Req\": \"&b8_req\",\n \"Read Req\": \"&b8_readReq\",\n \"Write Req\": \"&b8_writeReq\",\n \"AtomicReq\": \"&b8_atomicReq\",\n \"EA Read Req\": \"&b8_eaReadReq\",\n \"EA Write Req\": \"&b8_eaWriteReq\",\n \"EA AtomicReq\": \"&b8_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b8_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b8_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b8_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b8_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b8_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b8_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b8_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b8_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b8_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b8_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"9\",\n \"Hit Rate\": \"&b9_hitRate\",\n \"Req\": \"&b9_req\",\n \"Read Req\": \"&b9_readReq\",\n \"Write Req\": \"&b9_writeReq\",\n \"AtomicReq\": \"&b9_atomicReq\",\n \"EA Read Req\": \"&b9_eaReadReq\",\n \"EA Write Req\": \"&b9_eaWriteReq\",\n \"EA AtomicReq\": \"&b9_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b9_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b9_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b9_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b9_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b9_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b9_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b9_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b9_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b9_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b9_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"10\",\n \"Hit Rate\": \"&b10_hitRate\",\n \"Req\": \"&b10_req\",\n \"Read Req\": \"&b10_readReq\",\n \"Write Req\": \"&b10_writeReq\",\n \"AtomicReq\": \"&b10_atomicReq\",\n \"EA Read Req\": \"&b10_eaReadReq\",\n \"EA Write Req\": \"&b10_eaWriteReq\",\n \"EA AtomicReq\": \"&b10_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b10_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b10_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b10_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b10_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b10_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b10_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b10_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b10_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b10_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b10_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"11\",\n \"Hit Rate\": \"&b11_hitRate\",\n \"Req\": \"&b11_req\",\n \"Read Req\": \"&b11_readReq\",\n \"Write Req\": \"&b11_writeReq\",\n \"AtomicReq\": \"&b11_atomicReq\",\n \"EA Read Req\": \"&b11_eaReadReq\",\n \"EA Write Req\": \"&b11_eaWriteReq\",\n \"EA AtomicReq\": \"&b11_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b11_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b11_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b11_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b11_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b11_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b11_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b11_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b11_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b11_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b11_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"12\",\n \"Hit Rate\": \"&b12_hitRate\",\n \"Req\": \"&b12_req\",\n \"Read Req\": \"&b12_readReq\",\n \"Write Req\": \"&b12_writeReq\",\n \"AtomicReq\": \"&b12_atomicReq\",\n \"EA Read Req\": \"&b12_eaReadReq\",\n \"EA Write Req\": \"&b12_eaWriteReq\",\n \"EA AtomicReq\": \"&b12_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b12_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b12_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b12_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b12_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b12_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b12_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b12_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b12_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b12_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b12_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"13\",\n \"Hit Rate\": \"&b13_hitRate\",\n \"Req\": \"&b13_req\",\n \"Read Req\": \"&b13_readReq\",\n \"Write Req\": \"&b13_writeReq\",\n \"AtomicReq\": \"&b13_atomicReq\",\n \"EA Read Req\": \"&b13_eaReadReq\",\n \"EA Write Req\": \"&b13_eaWriteReq\",\n \"EA AtomicReq\": \"&b13_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b13_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b13_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b13_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b13_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b13_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b13_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b13_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b13_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b13_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b13_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"14\",\n \"Hit Rate\": \"&b14_hitRate\",\n \"Req\": \"&b14_req\",\n \"Read Req\": \"&b14_readReq\",\n \"Write Req\": \"&b14_writeReq\",\n \"AtomicReq\": \"&b14_atomicReq\",\n \"EA Read Req\": \"&b14_eaReadReq\",\n \"EA Write Req\": \"&b14_eaWriteReq\",\n \"EA AtomicReq\": \"&b14_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b14_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b14_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b14_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b14_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b14_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b14_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b14_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b14_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b14_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b14_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"15\",\n \"Hit Rate\": \"&b15_hitRate\",\n \"Req\": \"&b15_req\",\n \"Read Req\": \"&b15_readReq\",\n \"Write Req\": \"&b15_writeReq\",\n \"AtomicReq\": \"&b15_atomicReq\",\n \"EA Read Req\": \"&b15_eaReadReq\",\n \"EA Write Req\": \"&b15_eaWriteReq\",\n \"EA AtomicReq\": \"&b15_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b15_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b15_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b15_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b15_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b15_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b15_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b15_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b15_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b15_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b15_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 73 + }, + "id": 70, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b16_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[16]\"]}, \n { \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b16_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[16]\"}, \"&denom\"] } \n },\n \"b16_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[16]\"}, \"&denom\"] } \n },\n \"b16_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[16]\"}, \"&denom\"] } \n },\n \"b16_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[16]\"}, \"&denom\"] } \n },\n \"b16_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[16]\"}, \"&denom\"] }\n },\n \"b16_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[16]\"}, \"&denom\"] } \n },\n \"b16_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[16]\"}, \"&denom\"] } \n },\n\n \"b16_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[16]\", \"&TCC_EA_RDREQ[16]\"]}, null] } },\n \"b16_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[16]\", \"&TCC_EA_WRREQ[16]\"]}, null] } },\n \"b16_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[16]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[16]\", \"&TCC_EA_ATOMIC[16]\"]}, null]}},\n \"b16_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"}, \"&denom\"] }},\n\n \n \"b17_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[17]\"]}, \n { \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b17_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[17]\"}, \"&denom\"] } \n },\n \"b17_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[17]\"}, \"&denom\"] } \n },\n \"b17_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[17]\"}, \"&denom\"] } \n },\n \"b17_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[17]\"}, \"&denom\"] }\n },\n \"b17_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[17]\"}, \"&denom\"] } \n },\n \"b17_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[17]\", \"&TCC_EA_RDREQ[17]\"]}, null] } },\n \"b17_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[17]\", \"&TCC_EA_WRREQ[17]\"]}, null] } },\n \"b17_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[17]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[17]\", \"&TCC_EA_ATOMIC[17]\"]}, null]}},\n \"b17_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"}, \"&denom\"] }},\n\n \n \"b18_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[18]\"]}, \n { \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b18_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[18]\"}, \"&denom\"] }\n },\n \"b18_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[18]\"}, \"&denom\"] } \n },\n \"b18_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[18]\"}, \"&denom\"] }\n },\n \"b18_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[18]\", \"&TCC_EA_RDREQ[18]\"]}, null] } },\n \"b18_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[18]\", \"&TCC_EA_WRREQ[18]\"]}, null] } },\n \"b18_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[18]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[18]\", \"&TCC_EA_ATOMIC[18]\"]}, null]}},\n \"b18_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"}, \"&denom\"] }},\n\n \n \"b19_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[19]\"]}, \n { \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b19_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[19]\"}, \"&denom\"] } \n },\n \"b19_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[19]\"}, \"&denom\"] } \n },\n \"b19_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[19]\"}, \"&denom\"] }\n },\n \"b19_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[19]\"}, \"&denom\"] }\n },\n \"b19_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[19]\"}, \"&denom\"] } \n },\n \"b19_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[19]\", \"&TCC_EA_RDREQ[19]\"]}, null] } },\n \"b19_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[19]\", \"&TCC_EA_WRREQ[19]\"]}, null] } },\n \"b19_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[19]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[19]\", \"&TCC_EA_ATOMIC[19]\"]}, null]}},\n \"b19_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"}, \"&denom\"] }},\n\n \n \"b20_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[20]\"]}, \n { \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b20_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[20]\"}, \"&denom\"] } \n },\n \"b20_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[20]\"}, \"&denom\"] } \n },\n \"b20_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[20]\"}, \"&denom\"] }\n },\n \"b20_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[20]\", \"&TCC_EA_RDREQ[20]\"]}, null] } },\n \"b20_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[20]\", \"&TCC_EA_WRREQ[20]\"]}, null] } },\n \"b20_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[20]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[20]\", \"&TCC_EA_ATOMIC[20]\"]}, null]}},\n \"b20_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"}, \"&denom\"] }},\n\n \n\n \"b21_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[21]\"]}, \n { \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b21_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[21]\"}, \"&denom\"] } \n },\n \"b21_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[21]\"}, \"&denom\"] } \n },\n \"b21_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[21]\"}, \"&denom\"] } \n },\n \"b21_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[21]\", \"&TCC_EA_RDREQ[21]\"]}, null] } },\n \"b21_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[21]\", \"&TCC_EA_WRREQ[21]\"]}, null] } },\n \"b21_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[21]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[21]\", \"&TCC_EA_ATOMIC[21]\"]}, null]}},\n \"b21_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"}, \"&denom\"] }},\n\n \n\n \"b22_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[22]\"]}, \n { \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b22_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[22]\"}, \"&denom\"] } \n },\n \"b22_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[22]\"}, \"&denom\"] } \n },\n \"b22_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[22]\"}, \"&denom\"] } \n },\n \"b22_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[22]\"}, \"&denom\"] } \n },\n \"b22_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[22]\"}, \"&denom\"] }\n },\n \"b22_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[22]\", \"&TCC_EA_RDREQ[22]\"]}, null] } },\n \"b22_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[22]\", \"&TCC_EA_WRREQ[22]\"]}, null] } },\n \"b22_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[22]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[22]\", \"&TCC_EA_ATOMIC[22]\"]}, null]}},\n \"b22_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"}, \"&denom\"] }},\n\n \n\n \"b23_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[23]\"]}, \n { \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b23_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[23]\"}, \"&denom\"] } \n },\n \"b23_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[23]\"}, \"&denom\"] } \n },\n \"b23_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[23]\"}, \"&denom\"] } \n },\n \"b23_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[23]\"}, \"&denom\"] } \n },\n \"b23_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[23]\"}, \"&denom\"] }\n },\n \"b23_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[23]\", \"&TCC_EA_RDREQ[23]\"]}, null] } },\n \"b23_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[23]\", \"&TCC_EA_WRREQ[23]\"]}, null] } },\n \"b23_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[23]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[23]\", \"&TCC_EA_ATOMIC[23]\"]}, null]}},\n \"b23_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"}, \"&denom\"] }},\n\n \n \"b24_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[24]\"]}, \n { \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b24_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[24]\"}, \"&denom\"] } \n },\n \"b24_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[24]\"}, \"&denom\"] } \n },\n \"b24_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[24]\"}, \"&denom\"] } \n },\n \"b24_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[24]\", \"&TCC_EA_RDREQ[24]\"]}, null] } },\n \"b24_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[24]\", \"&TCC_EA_WRREQ[24]\"]}, null] } },\n \"b24_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[24]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[24]\", \"&TCC_EA_ATOMIC[24]\"]}, null]}},\n \"b24_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"}, \"&denom\"] }},\n\n \n \"b25_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[25]\"]}, \n { \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b25_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[25]\"}, \"&denom\"] } \n },\n \"b25_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[25]\"}, \"&denom\"] } \n },\n \"b25_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[25]\"}, \"&denom\"] } \n },\n \"b25_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[25]\", \"&TCC_EA_RDREQ[25]\"]}, null] } },\n \"b25_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[25]\", \"&TCC_EA_WRREQ[25]\"]}, null] } },\n \"b25_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[25]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[25]\", \"&TCC_EA_ATOMIC[25]\"]}, null]}},\n \"b25_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"}, \"&denom\"] }},\n\n \n \"b26_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[26]\"]}, \n { \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b26_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[26]\"}, \"&denom\"] } \n },\n \"b26_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[26]\"}, \"&denom\"] } \n },\n \"b26_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[26]\"}, \"&denom\"] } \n },\n \"b26_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[26]\", \"&TCC_EA_RDREQ[26]\"]}, null] } },\n \"b26_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[26]\", \"&TCC_EA_WRREQ[26]\"]}, null] } },\n \"b26_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[26]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[26]\", \"&TCC_EA_ATOMIC[26]\"]}, null]}},\n \"b26_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"}, \"&denom\"] }},\n\n \n \"b27_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[27]\"]}, \n { \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b27_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[27]\"}, \"&denom\"] } \n },\n \"b27_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[27]\"}, \"&denom\"] } \n },\n \"b27_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[27]\"}, \"&denom\"] } \n },\n \"b27_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[27]\", \"&TCC_EA_RDREQ[27]\"]}, null] } },\n \"b27_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[27]\", \"&TCC_EA_WRREQ[27]\"]}, null] } },\n \"b27_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[27]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[27]\", \"&TCC_EA_ATOMIC[27]\"]}, null]}},\n \"b27_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"}, \"&denom\"] }},\n\n \n \"b28_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[28]\"]}, \n { \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b28_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[28]\"}, \"&denom\"] } \n },\n \"b28_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[28]\"}, \"&denom\"] } \n },\n \"b28_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[28]\"}, \"&denom\"] } \n },\n \"b28_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[28]\", \"&TCC_EA_RDREQ[28]\"]}, null] } },\n \"b28_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[28]\", \"&TCC_EA_WRREQ[28]\"]}, null] } },\n \"b28_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[28]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[28]\", \"&TCC_EA_ATOMIC[28]\"]}, null]}},\n \"b28_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"}, \"&denom\"] }},\n\n \n \"b29_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[29]\"]}, \n { \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b29_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[29]\"}, \"&denom\"] } \n },\n \"b29_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[29]\"}, \"&denom\"] } \n },\n \"b29_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[29]\"}, \"&denom\"] } \n },\n \"b29_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[29]\"}, \"&denom\"] } \n },\n \"b29_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[29]\"}, \"&denom\"] }\n },\n \"b29_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[29]\", \"&TCC_EA_RDREQ[29]\"]}, null] } },\n \"b29_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[29]\", \"&TCC_EA_WRREQ[29]\"]}, null] } },\n \"b29_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[29]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[29]\", \"&TCC_EA_ATOMIC[29]\"]}, null]}},\n \"b29_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"}, \"&denom\"] }},\n\n \n \"b30_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[30]\"]}, \n { \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b30_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[30]\"}, \"&denom\"] } \n },\n \"b30_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[30]\"}, \"&denom\"] } \n },\n \"b30_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[30]\"}, \"&denom\"] } \n },\n \"b30_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[30]\", \"&TCC_EA_RDREQ[30]\"]}, null] } },\n \"b30_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[30]\", \"&TCC_EA_WRREQ[30]\"]}, null] } },\n \"b30_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[30]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[30]\", \"&TCC_EA_ATOMIC[30]\"]}, null]}},\n \"b30_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"}, \"&denom\"] }},\n\n \n \"b31_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[31]\"]}, \n { \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b31_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[31]\"}, \"&denom\"] } \n },\n \"b31_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[31]\"}, \"&denom\"] } \n },\n \"b31_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[31]\"}, \"&denom\"] } \n },\n \"b31_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[31]\"}, \"&denom\"] } \n },\n \"b31_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}, \"&denom\"] }\n },\n \"b31_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[31]\", \"&TCC_EA_RDREQ[31]\"]}, null] } },\n \"b31_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[31]\", \"&TCC_EA_WRREQ[31]\"]}, null] } },\n \"b31_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[31]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[31]\", \"&TCC_EA_ATOMIC[31]\"]}, null]}},\n \"b31_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}, \"&denom\"] }}\n\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"16\",\n \"Hit Rate\": \"&b16_hitRate\",\n \"Req\": \"&b16_req\",\n \"Read Req\": \"&b16_readReq\",\n \"Write Req\": \"&b16_writeReq\",\n \"AtomicReq\": \"&b16_atomicReq\",\n \"EA Read Req\": \"&b16_eaReadReq\",\n \"EA Write Req\": \"&b16_eaWriteReq\",\n \"EA AtomicReq\": \"&b16_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b16_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b16_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b16_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b16_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b16_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b16_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b16_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b16_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b16_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b16_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"17\",\n \"Hit Rate\": \"&b17_hitRate\",\n \"Req\": \"&b17_req\",\n \"Read Req\": \"&b17_readReq\",\n \"Write Req\": \"&b17_writeReq\",\n \"AtomicReq\": \"&b17_atomicReq\",\n \"EA Read Req\": \"&b17_eaReadReq\",\n \"EA Write Req\": \"&b17_eaWriteReq\",\n \"EA AtomicReq\": \"&b17_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b17_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b17_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b17_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b17_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b17_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b17_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b17_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b17_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b17_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b17_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"18\",\n \"Hit Rate\": \"&b18_hitRate\",\n \"Req\": \"&b18_req\",\n \"Read Req\": \"&b18_readReq\",\n \"Write Req\": \"&b18_writeReq\",\n \"AtomicReq\": \"&b18_atomicReq\",\n \"EA Read Req\": \"&b18_eaReadReq\",\n \"EA Write Req\": \"&b18_eaWriteReq\",\n \"EA AtomicReq\": \"&b18_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b18_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b18_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b18_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b18_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b18_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b18_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b18_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b18_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b18_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b18_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"19\",\n \"Hit Rate\": \"&b19_hitRate\",\n \"Req\": \"&b19_req\",\n \"Read Req\": \"&b19_readReq\",\n \"Write Req\": \"&b19_writeReq\",\n \"AtomicReq\": \"&b19_atomicReq\",\n \"EA Read Req\": \"&b19_eaReadReq\",\n \"EA Write Req\": \"&b19_eaWriteReq\",\n \"EA AtomicReq\": \"&b19_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b19_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b19_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b19_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b19_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b19_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b19_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b19_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b19_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b19_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b19_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"20\",\n \"Hit Rate\": \"&b20_hitRate\",\n \"Req\": \"&b20_req\",\n \"Read Req\": \"&b20_readReq\",\n \"Write Req\": \"&b20_writeReq\",\n \"AtomicReq\": \"&b20_atomicReq\",\n \"EA Read Req\": \"&b20_eaReadReq\",\n \"EA Write Req\": \"&b20_eaWriteReq\",\n \"EA AtomicReq\": \"&b20_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b20_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b20_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b20_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b20_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b20_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b20_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b20_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b20_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b20_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b20_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"21\",\n \"Hit Rate\": \"&b21_hitRate\",\n \"Req\": \"&b21_req\",\n \"Read Req\": \"&b21_readReq\",\n \"Write Req\": \"&b21_writeReq\",\n \"AtomicReq\": \"&b21_atomicReq\",\n \"EA Read Req\": \"&b21_eaReadReq\",\n \"EA Write Req\": \"&b21_eaWriteReq\",\n \"EA AtomicReq\": \"&b21_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b21_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b21_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b21_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b21_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b21_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b21_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b21_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b21_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b21_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b21_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"22\",\n \"Hit Rate\": \"&b22_hitRate\",\n \"Req\": \"&b22_req\",\n \"Read Req\": \"&b22_readReq\",\n \"Write Req\": \"&b22_writeReq\",\n \"AtomicReq\": \"&b22_atomicReq\",\n \"EA Read Req\": \"&b22_eaReadReq\",\n \"EA Write Req\": \"&b22_eaWriteReq\",\n \"EA AtomicReq\": \"&b22_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b22_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b22_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b22_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b22_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b22_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b22_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b22_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b22_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b22_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b22_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"23\",\n \"Hit Rate\": \"&b23_hitRate\",\n \"Req\": \"&b23_req\",\n \"Read Req\": \"&b23_readReq\",\n \"Write Req\": \"&b23_writeReq\",\n \"AtomicReq\": \"&b23_atomicReq\",\n \"EA Read Req\": \"&b23_eaReadReq\",\n \"EA Write Req\": \"&b23_eaWriteReq\",\n \"EA AtomicReq\": \"&b23_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b23_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b23_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b23_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b23_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b23_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b23_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b23_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b23_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b23_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b23_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"24\",\n \"Hit Rate\": \"&b24_hitRate\",\n \"Req\": \"&b24_req\",\n \"Read Req\": \"&b24_readReq\",\n \"Write Req\": \"&b24_writeReq\",\n \"AtomicReq\": \"&b24_atomicReq\",\n \"EA Read Req\": \"&b24_eaReadReq\",\n \"EA Write Req\": \"&b24_eaWriteReq\",\n \"EA AtomicReq\": \"&b24_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b24_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b24_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b24_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b24_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b24_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b24_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b24_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b24_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b24_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b24_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"25\",\n \"Hit Rate\": \"&b25_hitRate\",\n \"Req\": \"&b25_req\",\n \"Read Req\": \"&b25_readReq\",\n \"Write Req\": \"&b25_writeReq\",\n \"AtomicReq\": \"&b25_atomicReq\",\n \"EA Read Req\": \"&b25_eaReadReq\",\n \"EA Write Req\": \"&b25_eaWriteReq\",\n \"EA AtomicReq\": \"&b25_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b25_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b25_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b25_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b25_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b25_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b25_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b25_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b25_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b25_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b25_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"26\",\n \"Hit Rate\": \"&b26_hitRate\",\n \"Req\": \"&b26_req\",\n \"Read Req\": \"&b26_readReq\",\n \"Write Req\": \"&b26_writeReq\",\n \"AtomicReq\": \"&b26_atomicReq\",\n \"EA Read Req\": \"&b26_eaReadReq\",\n \"EA Write Req\": \"&b26_eaWriteReq\",\n \"EA AtomicReq\": \"&b26_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b26_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b26_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b26_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b26_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b26_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b26_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b26_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b26_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b26_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b26_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"27\",\n \"Hit Rate\": \"&b27_hitRate\",\n \"Req\": \"&b27_req\",\n \"Read Req\": \"&b27_readReq\",\n \"Write Req\": \"&b27_writeReq\",\n \"AtomicReq\": \"&b27_atomicReq\",\n \"EA Read Req\": \"&b27_eaReadReq\",\n \"EA Write Req\": \"&b27_eaWriteReq\",\n \"EA AtomicReq\": \"&b27_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b27_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b27_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b27_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b27_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b27_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b27_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b27_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b27_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b27_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b27_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"28\",\n \"Hit Rate\": \"&b28_hitRate\",\n \"Req\": \"&b28_req\",\n \"Read Req\": \"&b28_readReq\",\n \"Write Req\": \"&b28_writeReq\",\n \"AtomicReq\": \"&b28_atomicReq\",\n \"EA Read Req\": \"&b28_eaReadReq\",\n \"EA Write Req\": \"&b28_eaWriteReq\",\n \"EA AtomicReq\": \"&b28_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b28_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b28_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b28_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b28_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b28_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b28_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b28_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b28_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b28_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b28_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"29\",\n \"Hit Rate\": \"&b29_hitRate\",\n \"Req\": \"&b29_req\",\n \"Read Req\": \"&b29_readReq\",\n \"Write Req\": \"&b29_writeReq\",\n \"AtomicReq\": \"&b29_atomicReq\",\n \"EA Read Req\": \"&b29_eaReadReq\",\n \"EA Write Req\": \"&b29_eaWriteReq\",\n \"EA AtomicReq\": \"&b29_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b29_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b29_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b29_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b29_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b29_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b29_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b29_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b29_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b29_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b29_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"30\",\n \"Hit Rate\": \"&b30_hitRate\",\n \"Req\": \"&b30_req\",\n \"Read Req\": \"&b30_readReq\",\n \"Write Req\": \"&b30_writeReq\",\n \"AtomicReq\": \"&b30_atomicReq\",\n \"EA Read Req\": \"&b30_eaReadReq\",\n \"EA Write Req\": \"&b30_eaWriteReq\",\n \"EA AtomicReq\": \"&b30_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b30_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b30_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b30_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b30_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b30_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b30_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b30_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b30_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b30_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b30_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"31\",\n \"Hit Rate\": \"&b31_hitRate\",\n \"Req\": \"&b31_req\",\n \"Read Req\": \"&b31_readReq\",\n \"Write Req\": \"&b31_writeReq\",\n \"AtomicReq\": \"&b31_atomicReq\",\n \"EA Read Req\": \"&b31_eaReadReq\",\n \"EA Write Req\": \"&b31_eaWriteReq\",\n \"EA AtomicReq\": \"&b31_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b31_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b31_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b31_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b31_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b31_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b31_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b31_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b31_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b31_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b31_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 16-31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 81 + }, + "id": 93, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 81 + }, + "id": 94, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 89 + }, + "id": 187, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 89 + }, + "id": 201, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 97 + }, + "id": 220, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 97 + }, + "id": 227, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 221, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 105 + }, + "id": 228, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 222, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 113 + }, + "id": 229, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 121 + }, + "id": 223, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 121 + }, + "id": 230, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 129 + }, + "id": 225, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 129 + }, + "id": 231, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 137 + }, + "id": 224, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 137 + }, + "id": 232, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 145 + }, + "id": 226, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 145 + }, + "id": 233, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache (per Channel)", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 34, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + "hide": 0, + "includeAll": false, + "label": "Normalization", + "multi": false, + "name": "normUnit", + "options": [ + { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + { + "selected": false, + "text": "\"per Cycle\"", + "value": "\"per Cycle\"" + }, + { + "selected": false, + "text": "\"per Sec\"", + "value": "\"per Sec\"" + }, + { + "selected": false, + "text": "\"per Kernel\"", + "value": "\"per Kernel\"" + } + ], + "query": "\"per Wave\",\n\"per Cycle\",\n\"per Sec\",\n\"per Kernel\"", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "L2 Channels", + "multi": false, + "name": "L2Banks", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SEs", + "multi": false, + "name": "numSE", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "104", + "value": "104" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#CUs", + "multi": false, + "name": "numCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SCLK (MHz)", + "multi": false, + "name": "sclk", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SQC", + "multi": false, + "name": "numSQC", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "HBM BW (GB/s)", + "multi": false, + "name": "hbmBW", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "rocprofiler-compute_asw_mix_mi200", + "value": "rocprofiler-compute_asw_mix_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Workload", + "multi": false, + "name": "Workload1", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "433323", + "value": "433323" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Kernel Cycles", + "multi": false, + "name": "kernelBusyCycles", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "103", + "value": "103" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Active CUs", + "multi": false, + "name": "numActiveCUs", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Filtered Dispatch ID", + "multi": false, + "name": "DispatchIDFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Dispatch Filter", + "name": "DispatchID", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "GCD", + "multi": false, + "name": "gpuFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Kernels", + "multi": true, + "name": "KernelNameFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "miperf_asw_vcopy_mi200", + "value": "miperf_asw_vcopy_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline Workload", + "multi": false, + "name": "Workload2", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "84", + "value": "84" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline ActiveCUs", + "multi": false, + "name": "numActiveCUs2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Baseline Dispatch IDs", + "multi": false, + "name": "DispatchIDFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID2:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Baseline Dispatch Filter", + "name": "DispatchID2", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline GCD", + "multi": false, + "name": "gpuFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Baseline Kernels", + "multi": true, + "name": "KernelNameFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "System Info" + ], + "value": [ + "System Info" + ] + }, + "hide": 0, + "includeAll": false, + "label": "Comparison Panels", + "multi": true, + "name": "select", + "options": [ + { + "selected": true, + "text": "System Info", + "value": "System Info" + }, + { + "selected": false, + "text": "System Speed-of-Light", + "value": "System Speed-of-Light" + }, + { + "selected": false, + "text": "Roofline", + "value": "Roofline" + }, + { + "selected": false, + "text": "Command Processor", + "value": "Command Processor" + }, + { + "selected": false, + "text": "Shader Processor Input", + "value": "Shader Processor Input" + }, + { + "selected": false, + "text": "Wavefront", + "value": "Wavefront" + }, + { + "selected": false, + "text": "Compute Pipeline", + "value": "Compute Pipeline" + }, + { + "selected": false, + "text": "Instruction Mix", + "value": "Instruction Mix" + }, + { + "selected": false, + "text": "Local Data Share", + "value": "Local Data Share" + }, + { + "selected": false, + "text": "Instruction Cache", + "value": "Instruction Cache" + }, + { + "selected": false, + "text": "Scalar L1D Cache", + "value": "Scalar L1D Cache" + }, + { + "selected": false, + "text": "Texture Addr and Data", + "value": "Texture Addr and Data" + }, + { + "selected": false, + "text": "Vector L1D Cache", + "value": "Vector L1D Cache" + }, + { + "selected": false, + "text": "L2 Cache", + "value": "L2 Cache" + } + ], + "query": "System Info, \nSystem Speed-of-Light, \nRoofline,\nCommand Processor, \nShader Processor Input, \nWavefront,\nCompute Pipeline, \nInstruction Mix,\nLocal Data Share, \nInstruction Cache, \nScalar L1D Cache, \nTexture Addr and Data, \nVector L1D Cache,\nL2 Cache", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline L2 Channels", + "multi": false, + "name": "L2Banks2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SEs", + "multi": false, + "name": "numSE2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "110", + "value": "110" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #CUs", + "multi": false, + "name": "numCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline SCLK (MHz)", + "multi": false, + "name": "sclk2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SQC", + "multi": false, + "name": "numSQC2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline HBM BW (GB/s)", + "multi": false, + "name": "hbmBW2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "mi200", + "value": "mi200" + }, + "definition": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SOC", + "multi": false, + "name": "soc", + "options": [], + "query": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "5", + "value": "5" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "TopN", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "20", + "value": "20" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "query": "1,5,10,15,20,50,100", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2021-11-04T14:21:39.749Z", + "to": "2021-11-08T14:21:39.749Z" + }, + "timepicker": {}, + "timezone": "", + "title": "rocprofiler-compute_v1.0.7_pub", + "uid": "MIPerf_v1_0_06302022112", + "version": 2, + "weekStart": "" +} diff --git a/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.8_pub.json b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.8_pub.json new file mode 100644 index 0000000000..760081cf13 --- /dev/null +++ b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v1.0.8_pub.json @@ -0,0 +1,13397 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "iteration": 1692036465764, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 217, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 23, + "w": 13, + "x": 0, + "y": 1 + }, + "id": 159, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.sysinfo.aggregate([\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"command\": 1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"App Command\",\n \"Value\": \"&command\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.sysinfo.aggregate([\n {\"$match\": {\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(System Info)\"}}\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"date\":1,\n \"command\": 1,\n \"host_name\": 1,\n \"host_cpu\": 1,\n \"host_distro\": 1,\n \"host_kernel\": 1,\n \"host_rocmver\": 1,\n \"gpu_soc\": 1,\n \"name\": 1,\n \"numSE\": 1,\n \"numSQC\": 1,\n \"numCU\": 1,\n \"numSIMD\": 1,\n \"waveSize\": 1,\n \"maxWavesPerCU\": 1,\n \"maxWorkgroupSize\":1,\n \"L1\":1,\n \"L2\":1,\n \"L2Banks\": 1,\n \"sclk\":1,\n \"mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbmBW\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"&date\"\n },\n {\n \"Metric\":\"App Command\",\n \"Value\": \"&command\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&host_name\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&host_cpu\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&host_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&host_kernel\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&host_rocmver\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&name\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_soc\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&numSE\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&numSQC\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&numCU\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&numSIMD\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&maxWavesPerCU\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&maxWorkgroupSize\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&L1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&L2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&L2Banks\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbmBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "System Info", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true + }, + "indexByName": {}, + "renameByName": { + "Value 1": "Current", + "Value 2": "Baseline" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Info", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 108, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "decimals": 0, + "links": [], + "mappings": [ + { + "options": { + "match": "false", + "result": { + "index": 0 + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text" + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Percent of Peak - PoP" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.width", + "value": 252 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit 1" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 137 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 125 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 161 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 15, + "x": 0, + "y": 2 + }, + "id": 110, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n },\n\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs,\n \"Unit\": \"CUs\",\n \"peak\": $numCU,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n },\n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }}\n\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2, 4] }] }\n },\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}, \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs2,\n \"Unit\": \"CUs\",\n \"peak\": $numCU2,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs2] }, $numCU2]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk2, $numCU2, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numSQC2]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n }, \n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU2, $numCU2] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$maxWavesPerCU2, $numCU2] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }, $numSQC2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC2, { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]\n }}\n\n ]);", + "type": "table" + } + ], + "title": "Speed of Light", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Metric 1": 0, + "Metric 2": 7, + "Percent of Peak - PoP 1": 5, + "Percent of Peak - PoP 2": 6, + "Unit 1": 8, + "Unit 2": 9, + "Value 1": 1, + "Value 2": 2, + "peak 1": 3, + "peak 2": 4 + }, + "renameByName": { + "Percent of Peak - PoP": "Pct-of-Peak", + "Percent of Peak - PoP 1": "Pct-of-Peak (Current)", + "Percent of Peak - PoP 2": "Pct-of-Peak (Baseline)", + "Unit": "", + "Value": "Avg", + "Value 1": "Avg (Current)", + "Value 2": "Avg (Baseline)", + "peak": "Theoretical Max", + "peak 1": "Theoretical Max (Current)", + "peak 2": "Theoretical Max (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 175, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n],\n{ allowDiskUse: true }\n);", + "type": "table" + } + ], + "title": "Dispatch IDs - Current", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 215, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Index\",\n \"Kernel Name\": \"&KernelName\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Dispatch IDs - Baseline", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Speed-of-Light", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 36, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 157, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + } + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "u5Z2zJhnk" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"elapsedTime1\": {\n \"$divide\": [{\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}, 1000]\n }\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"elapsedTime1\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Kernel Time Histogram", + "transparent": true, + "type": "histogram" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 123 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Performance" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Peak FLOPs" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 213, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "L1 Cache (Bytes)" + } + ] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128 ]} \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n \n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n }}\n]);", + "type": "table" + } + ], + "title": "Top Kernels", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Name", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "peak_flops": "Peak FLOPs", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS " + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 87 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 153 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS (Bytes)" + }, + "properties": [ + { + "id": "custom.width", + "value": 98 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + }, + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dispatch" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 251, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&Index\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n\n }}\n]);", + "type": "table" + } + ], + "title": "Top Dispatches", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "peak_flops": 19, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS ", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Dispatch", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Kernel Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 40, + "panels": [ + { + "description": "All transaction units default to Billion, when per-sec norm is used", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 285, + "options": { + "addAllIDs": false, + "captureMappings": false, + "eventAutoComplete": true, + "eventSource": "options.animateLogo(svgmap, data);\r\nconsole.log(\"Starting render\");\r\nlet buff = data.series[0].fields[2].values.buffer;\r\nlet valueCount = buff.length;\r\nconsole.log(\"The buff is \", valueCount, \" long\");\r\n\r\nsvgmap.wave_life_.text(buff[0]);\r\nsvgmap.active_cu_.text(buff[1]);\r\nsvgmap.salu_.text(buff[2]);\r\nsvgmap.smem_.text(buff[3]);\r\nsvgmap.valu_.text(buff[4]);\r\nsvgmap.mfma_.text(buff[5]);\r\nsvgmap.vmem_.text(buff[6]);\r\nsvgmap.lds_.text(buff[7]);\r\nsvgmap.gws_.text(buff[8]);\r\nsvgmap.br_.text(buff[9]);\r\nsvgmap.vgpr_.text(buff[10]);\r\nsvgmap.sgpr_.text(buff[11]);\r\nsvgmap.lds_alloc_.text(buff[12]);\r\nsvgmap.scratch_alloc_.text(buff[13]);\r\nsvgmap.wavefronts_.text(buff[14]);\r\nsvgmap.workgroups_.text(buff[15]);\r\nsvgmap.lds_req_.text(buff[16]);\r\nsvgmap.il1_fetch_.text(buff[17]);\r\nsvgmap.il1_hit_.text(buff[18]);\r\nsvgmap.il1_l2_rd_.text(buff[19]);\r\nsvgmap.sl1_rd_.text(buff[20]);\r\nsvgmap.sl1_hit_.text(buff[21]);\r\nsvgmap.sl1_l2_rd_.text(buff[22]);\r\nsvgmap.sl1_l2_wr_.text(buff[23]);\r\nsvgmap.sl1_l2_atom_.text(buff[24]);\r\nsvgmap.vl1_rd_.text(buff[25]);\r\nsvgmap.vl1_wr_.text(buff[26]);\r\nsvgmap.vl1_atom_.text(buff[27]);\r\nsvgmap.vl1_hit_.text(buff[28]);\r\nsvgmap.vl1_lat_.text(buff[29]);\r\nsvgmap.vl1_l2_rd_.text(buff[30]);\r\nsvgmap.vl1_l2_wr_.text(buff[31]);\r\nsvgmap.vl1_l2_atom_.text(buff[32]);\r\nsvgmap.l2_rd_.text(buff[33]);\r\nsvgmap.l2_wr_.text(buff[34])\r\nsvgmap.l2_atom_.text(buff[35]);\r\nsvgmap.l2_hit_.text(buff[36]);\r\nsvgmap.l2_rd_lat_.text(buff[37]);\r\nsvgmap.l2_wr_lat_.text(buff[38]);\r\nsvgmap.fabric_rd_lat_.text(buff[39]);\r\nsvgmap.fabric_wr_lat_.text(buff[40]);\r\nsvgmap.fabric_atom_lat_.text(buff[41]);\r\nsvgmap.l2_fabric_rd_.text(buff[42]);\r\nsvgmap.l2_fabric_wr_.text(buff[43]);\r\nsvgmap.l2_fabric_atom_.text(buff[44]);\r\nsvgmap.hbm_rd_.text(buff[45]);\r\nsvgmap.hbm_wr_.text(buff[46]);\r\nsvgmap.lds_util_.text(buff[47]);\r\nsvgmap.vl1_coales_.text(buff[48]);\r\nsvgmap.vl1_stall_.text(buff[49]);\r\nsvgmap.wave_occ_.text(buff[50]);\r\nsvgmap.lds_lat_.text(buff[51]);\r\nsvgmap.il1_lat_.text(buff[52]);\r\nsvgmap.sl1_lat_.text(buff[53]);\r\nsvgmap.gds_req_.text(buff[54]);", + "initAutoComplete": true, + "initSource": "options.animateLogo = (svgmap, data) => {\r\n \r\n}\r\n ", + "svgMappings": [ + { + "mappedName": "wave_life_", + "svgId": "wave_life" + }, + { + "mappedName": "wave_occ_", + "svgId": "wave_occ" + }, + { + "mappedName": "salu_", + "svgId": "salu" + }, + { + "mappedName": "smem_", + "svgId": "smem" + }, + { + "mappedName": "valu_", + "svgId": "valu" + }, + { + "mappedName": "mfma_", + "svgId": "mfma" + }, + { + "mappedName": "vmem_", + "svgId": "vmem" + }, + { + "mappedName": "lds_", + "svgId": "lds" + }, + { + "mappedName": "gws_", + "svgId": "gws" + }, + { + "mappedName": "br_", + "svgId": "br" + }, + { + "mappedName": "active_cu_", + "svgId": "active_cu" + }, + { + "mappedName": "vgpr_", + "svgId": "vgpr" + }, + { + "mappedName": "sgpr_", + "svgId": "sgpr" + }, + { + "mappedName": "lds_alloc_", + "svgId": "lds_alloc" + }, + { + "mappedName": "scratch_alloc_", + "svgId": "scratch_alloc" + }, + { + "mappedName": "wavefronts_", + "svgId": "wavefronts" + }, + { + "mappedName": "workgroups_", + "svgId": "workgroups" + }, + { + "mappedName": "lds_req_", + "svgId": "lds_req" + }, + { + "mappedName": "vl1_wr_", + "svgId": "vl1_wr" + }, + { + "mappedName": "vl1_atom_", + "svgId": "vl1_atom" + }, + { + "mappedName": "sl1_rd_", + "svgId": "sl1_rd" + }, + { + "mappedName": "il1_fetch_", + "svgId": "il1_fetch" + }, + { + "mappedName": "lds_lat_", + "svgId": "lds_lat" + }, + { + "mappedName": "lds_bw_", + "svgId": "lds_bw" + }, + { + "mappedName": "lds_util_", + "svgId": "lds_util" + }, + { + "mappedName": "vl1_hit_", + "svgId": "vl1_hit" + }, + { + "mappedName": "vl1_lat_", + "svgId": "vl1_lat" + }, + { + "mappedName": "vl1_coales_", + "svgId": "vl1_coales" + }, + { + "mappedName": "vl1_stall_", + "svgId": "vl1_stall" + }, + { + "mappedName": "sl1_hit_", + "svgId": "sl1_hit" + }, + { + "mappedName": "sl1_lat_", + "svgId": "sl1_lat" + }, + { + "mappedName": "il1_hit_", + "svgId": "il1_hit" + }, + { + "mappedName": "il1_lat_", + "svgId": "il1_lat" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "il1_l2_rd_", + "svgId": "il1_l2_rd" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "l2_rd_", + "svgId": "l2_rd" + }, + { + "mappedName": "l2_wr_", + "svgId": "l2_wr" + }, + { + "mappedName": "l2_atom_", + "svgId": "l2_atom" + }, + { + "mappedName": "l2_hit_", + "svgId": "l2_hit" + }, + { + "mappedName": "l2_rd_lat_", + "svgId": "l2_rd_lat" + }, + { + "mappedName": "l2_wr_lat_", + "svgId": "l2_wr_lat" + }, + { + "mappedName": "l2_fabric_rd_", + "svgId": "l2_fabric_rd" + }, + { + "mappedName": "l2_fabric_wr_", + "svgId": "l2_fabric_wr" + }, + { + "mappedName": "l2_fabric_atom_", + "svgId": "l2_fabric_atom" + }, + { + "mappedName": "fabric_rd_lat_", + "svgId": "fabric_rd_lat" + }, + { + "mappedName": "fabric_wr_lat_", + "svgId": "fabric_wr_lat" + }, + { + "mappedName": "fabric_atom_lat_", + "svgId": "fabric_atom_lat" + }, + { + "mappedName": "fabric_hbm_rd_", + "svgId": "fabric_hbm_rd" + }, + { + "mappedName": "fabric_hbm_wr_", + "svgId": "fabric_hbm_wr" + }, + { + "mappedName": "vl1_rd_", + "svgId": "vl1_rd" + }, + { + "mappedName": "vl1_l2_rd_", + "svgId": "vl1_l2_rd" + }, + { + "mappedName": "vl1_l2_wr_", + "svgId": "vl1_l2_wr" + }, + { + "mappedName": "vl1_l2_atom_", + "svgId": "vl1_l2_atom" + }, + { + "mappedName": "hbm_rd_", + "svgId": "hbm_rd" + }, + { + "mappedName": "hbm_wr_", + "svgId": "hbm_wr" + } + ], + "svgSource": "\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n Wave Occupancy\r\n \r\n Wave Life\r\n \r\n \r\n \r\n xGMI /\r\n PCIe\r\n \r\n GMI\r\n \r\n HBM\r\n \r\n Fabric\r\n \r\n \r\n SALU:\r\n 00000\r\n \r\n \r\n SMEM:\r\n 00000\r\n \r\n \r\n VALU:\r\n 00000\r\n \r\n \r\n MFMA:\r\n 00000\r\n \r\n \r\n VMEM:\r\n 00000\r\n \r\n \r\n LDS:\r\n 00000\r\n \r\n \r\n GWS:\r\n 00000\r\n \r\n \r\n Br:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n \r\n \r\n cycles\r\n Atomic:\r\n 00000\r\n \r\n \r\n Rd:\r\n 00000\r\n \r\n \r\n Wr:\r\n \r\n \r\n 00000\r\n \r\n \r\n Atomic:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n 00000\r\n Rd:\r\n 00000\r\n Wr:\r\n 00000\r\n Req:\r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n per-GCD\r\n cycles\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n Wave 0 Instr buff\r\n Wave N-1 Instr buff\r\n Active CUs\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Util:\r\n 00000\r\n \r\n \r\n %\r\n Coales:\r\n 00000\r\n Exec\r\n Instr Buff\r\n Instr Dispatch\r\n LDS\r\n Vector L1 Cache\r\n Scalar L1D Cache\r\n Instr L1 Cache\r\n L2 Cache\r\n 00000\r\n Req:\r\n \r\n \r\n %\r\n Stall:\r\n 00000\r\n 00000\r\n Fetch:\r\n 0000000\r\n 00000\r\n 000/000\r\n \r\n Latency\r\n \r\n LDS Alloc:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n \r\n Scratch Alloc:\r\n \r\n 00000\r\n \r\n Wavefronts:\r\n \r\n 00000\r\n \r\n Workgroups:\r\n \r\n 00000\r\n \r\n VGPRs:\r\n \r\n 00000\r\n \r\n SGPRs:\r\n \r\n 00000\r\n \r\n \r\n 00000\r\n Rd:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n Latency\r\n \r\n \r\n \r\n \r\n Text is not SVG - cannot display\r\n \r\n \r\n" + }, + "pluginVersion": "8.4.0", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&arch_vgpr\"\n },\n \"sgpr\": {\n \"$avg\": \"&sgpr\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&lds\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&scr\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$numCU\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_LEVEL_WAVES", + "target": "$Workload1.SQ_LEVEL_WAVES.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_occ\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\",\"&GRBM_GUI_ACTIVE\"] }, $numActiveCUs]}\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Alias\": \"wave_occ_\",\n \"Value\":{ \"$round\": [\"&wave_occ\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "$Workload1.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"lds_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&SQ_INSTS_LDS\", 0] },\n { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\"] },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"LDS Lat\",\n \"Alias\": \"lds_lat_\",\n \"Value\":{ \"$round\": [\"&lds_lat\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_ICACHE_INFLIGHT", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"il1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_ICACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_ICACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"IL1 Lat\",\n \t\t\t\"Alias\": \"il1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&il1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_DCACHE_INFLIGHT_LEVEL", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Index\",\n\t\t\"foreignField\": \"Index\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"sl1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_DCACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_DCACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"vL1D Lat\",\n \t\t\t\"Alias\": \"sl1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&sl1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + } + ], + "title": "Memory Chart (Normalization: $normUnit\")", + "transformations": [ + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "string", + "targetField": "Value" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "amd-custom-svg" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Memory Chart Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 241, + "panels": [ + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 253, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm" + }, + "name": "HBM-VLAU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2" + }, + "name": "L2-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1" + }, + "name": "vL1D-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS" + }, + "name": "LDS-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA" + }, + "name": "HBM-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA" + }, + "name": "L2-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA" + }, + "name": "vL1D-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA" + }, + "name": "LDS-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "rawQuery": true, + "refId": "HBM-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"&high_flop\"\n }\n },\n\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP32/FP64 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + }, + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 312, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_fp16" + }, + "name": "HBM-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_fp16" + }, + "name": "L2-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_fp16" + }, + "name": "vL1D-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_fp16" + }, + "name": "LDS-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_i8" + }, + "name": "HBM-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_i8" + }, + "name": "L2-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_i8" + }, + "name": "vL1D-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_i8" + }, + "name": "LDS-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&HBMBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&HBMBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&HBMBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&KernelName\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&KernelName\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP16/INT8 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Roofline Analysis", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 2, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 13, + "x": 0, + "y": 6 + }, + "id": 6, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Fetcher", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 171 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baselin)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 147 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 11, + "x": 13, + "y": 6 + }, + "id": 4, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Compute", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Metric 1": "", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Command Processor (CPC/CPF)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 102, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 101 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 145 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 97 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 123 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 106, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 285 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 242 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 104, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Resource Allocation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Shader Processor Input (SPI)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 185, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 142 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 196 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 174 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.width", + "value": 168 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min" + }, + "properties": [ + { + "id": "custom.width", + "value": 272 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 225 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "interval": "12h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\n \"$avg\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&arch_vgpr\"\n ] \n }\n },\n \"vgprs_min\":{\n \"$min\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&arch_vgpr\"\n ] \n }\n },\n \"vgprs_max\":{\n \"$max\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&arch_vgpr\"\n ] \n }\n },\n\n \"agprs_avg\":{\n \"$avg\": \"&accum_vgpr\"\n },\n \"agprs_min\":{\n \"$min\": \"&accum_vgpr\"\n },\n \"agprs_max\":{\n \"$max\": \"&accum_vgpr\"\n },\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"AGPRs\",\n \"Avg\": \"&agprs_avg\",\n \"Min\": \"&agprs_min\",\n \"Max\": \"&agprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Wavefront Launch Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true, + "Units 2": true, + "metric 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 223 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 34, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&EndNs\", \"&BeginNs\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "Wavefront Runtime Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg": "", + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "", + "Unit 2": "" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Wavefront", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 209, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 12, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"avg\": \"&valu_avg\",\n \"min\": \"&valu_min\",\n \"max\": \"&valu_max\"\n },\n {\n \"metric\": \"VMEM\",\n \"avg\": \"&vmem_avg\",\n \"min\": \"&vmem_min\",\n \"max\": \"&vmem_max\"\n },\n {\n \"metric\": \"LDS\",\n \"avg\": \"&lds_avg\",\n \"min\": \"&lds_min\",\n \"max\": \"&lds_max\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"avg\": \"&mfma_avg\",\n \"min\": \"&mfma_min\",\n \"max\": \"&mfma_max\"\n },\n {\n \"metric\": \"SALU\",\n \"avg\": \"&salu_avg\",\n \"min\": \"&salu_min\",\n \"max\": \"&salu_max\"\n },\n {\n \"metric\": \"SMEM\",\n \"avg\": \"&smem_avg\",\n \"min\": \"&smem_min\",\n \"max\": \"&smem_max\"\n },\n {\n \"metric\": \"Branch\",\n \"avg\": \"&branch_avg\",\n \"min\": \"&branch_min\",\n \"max\": \"&branch_max\"\n },\n {\n \"metric\": \"GDS\",\n \"avg\": \"&gds_avg\",\n \"min\": \"&gds_min\",\n \"max\": \"&gds_max\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"avg\": \"&valu_avg\",\n \"min\": \"&valu_min\",\n \"max\": \"&valu_max\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"avg\": \"&vmem_avg\",\n \"min\": \"&vmem_min\",\n \"max\": \"&vmem_max\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"avg\": \"&lds_avg\",\n \"min\": \"&lds_min\",\n \"max\": \"&lds_max\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"avg\": \"&mfma_avg\",\n \"min\": \"&mfma_min\",\n \"max\": \"&mfma_max\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"avg\": \"&salu_avg\",\n \"min\": \"&salu_min\",\n \"max\": \"&salu_max\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"avg\": \"&smem_avg\",\n \"min\": \"&smem_min\",\n \"max\": \"&smem_max\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"avg\": \"&branch_avg\",\n \"min\": \"&branch_min\",\n \"max\": \"&branch_max\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"avg\": \"&gds_avg\",\n \"min\": \"&gds_min\",\n \"max\": \"&gds_max\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Instruction Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "max": true, + "min": true + }, + "indexByName": {}, + "renameByName": { + "avg": "" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 24, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 24, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "limit": 100, + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VALU Arithmetic Instr Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 275, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VMEM Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "VMEM Instr", + "type 1": "VMEM Instr" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 16, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "MFMA Arithmetic Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "MFMA Instr", + "type 1": "MFMA Instr" + } + } + } + ], + "transparent": true, + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Instruction Mix", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 8, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 211, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] } },\n\n \"instr_val\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $numCU2, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Compute Pipeline", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "mfma_flops_bf16_pop 1": 4, + "mfma_flops_bf16_pop 2": 5, + "mfma_flops_f16_pop 1": 6, + "mfma_flops_f16_pop 2": 7, + "mfma_flops_f32_pop 1": 8, + "mfma_flops_f32_pop 2": 9, + "mfma_flops_f64_pop 1": 10, + "mfma_flops_f64_pop 2": 11, + "mfma_flops_i8_pop 1": 12, + "mfma_flops_i8_pop 2": 13, + "valu_flops_pop 1": 0, + "valu_flops_pop 2": 1, + "valu_iops_pop 1": 2, + "valu_iops_pop 2": 3 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "flops_pop": "FLOPs", + "flops_pop 1": "FLOPs (Current)", + "flops_pop 2": "FLOPs (Baseline)", + "iops_pop": "IOPs", + "iops_pop 1": "IOPs (Current)", + "iops_pop 2": "IOPs (Baseline)", + "mfma_flops_bf16_pop": "MFMA- BF16 (FLOPs)", + "mfma_flops_bf16_pop 1": "MFMA-BF16 (Cur)", + "mfma_flops_bf16_pop 2": "MFMA-BF16 (Baseline)", + "mfma_flops_f16_pop": "MFMA-F16 (FLOPs)", + "mfma_flops_f16_pop 1": "MFMA-F16 (Cur)", + "mfma_flops_f16_pop 2": "MFMA-F16 (Baseline)", + "mfma_flops_f32_pop": "MFMA-F32 (FLOPs)", + "mfma_flops_f32_pop 1": "MFMA-F32 (Cur)", + "mfma_flops_f32_pop 2": "MFMA-F32 (Baseline)", + "mfma_flops_f64_pop": "MFMA-F64 (FLOPs)", + "mfma_flops_f64_pop 1": "MFMA-F64 (Cur)", + "mfma_flops_f64_pop 2": "MFMA-F64 (Baseline)", + "mfma_flops_i8_pop": "MFMA-i8 (IOPs)", + "mfma_flops_i8_pop 1": "MFMA-I8 (Cur)", + "mfma_flops_i8_pop 2": "MFMA-I8 (Baseline)", + "valu_flops_pop": "VALU (FLOPs)", + "valu_flops_pop 1": "VALU FLOPs (Cur)", + "valu_flops_pop 2": "VALU FLOPs (Baseline)", + "valu_iops_pop": "VALU (IOPs)", + "valu_iops_pop 1": "VALU IOPs (Cur)", + "valu_iops_pop 2": "VALU IOPs (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 257, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU ] }\n },\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg\": \"&avg_ipcAvg\",\n \"Min\": \"&min_ipcAvg\",\n \"Max\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg\": \"&avg_ipcIssue\",\n \"Min\": \"&min_ipcIssue\",\n \"Max\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg\": \"&avg_saluUtil\",\n \"Min\": \"&min_saluUtil\",\n \"Max\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg\": \"&avg_valuUtil\",\n \"Min\": \"&min_valuUtil\",\n \"Max\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg\": \"&avg_unpredthreads_val\",\n \"Min\": \"&min_unpredthreads_val\",\n \"Max\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg\": \"&avg_mfmaUtil\",\n \"Min\": \"&min_mfmaUtil\",\n \"Max\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg\": \"&avg_mfmaInstrCycles\",\n \"Min\": \"&min_mfmaInstrCycles\",\n \"Max\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$numCU2 ] }\n },\n\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $numCU2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg 2\": \"&avg_ipcAvg\",\n \"Min 2\": \"&min_ipcAvg\",\n \"Max 2\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg 2\": \"&avg_ipcIssue\",\n \"Min 2\": \"&min_ipcIssue\",\n \"Max 2\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg 2\": \"&avg_saluUtil\",\n \"Min 2\": \"&min_saluUtil\",\n \"Max 2\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg 2\": \"&avg_valuUtil\",\n \"Min 2\": \"&min_valuUtil\",\n \"Max 2\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg 2\": \"&avg_unpredthreads_val\",\n \"Min 2\": \"&min_unpredthreads_val\",\n \"Max 2\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg 2\": \"&avg_mfmaUtil\",\n \"Min 2\": \"&min_mfmaUtil\",\n \"Max 2\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg 2\": \"&avg_mfmaInstrCycles\",\n \"Min 2\": \"&min_mfmaInstrCycles\",\n \"Max 2\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Pipeline Stats", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg 2": "Avg (Baseline)", + "Max 2": "Max (Baseline)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 96, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Arithmetic Operations", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 255, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM", + "target": "${Workload1}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg\": \"&avg_vmemLat\",\n \"Min\": \"&min_vmemLat\",\n \"Max\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM", + "target": "${Workload1}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg\":\"&avg_smemLat\",\n \"Min\":\"&min_smemLat\",\n \"Max\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL", + "target": "${Workload1}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg\":\"&avg_instrFetchLat\",\n \"Min\":\"&min_instrFetchLat\",\n \"Max\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "${Workload1}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg\":\"&avg_ldsLat\",\n \"Min\":\"&min_ldsLat\",\n \"Max\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg 2\": \"&avg_vmemLat\",\n \"Min 2\": \"&min_vmemLat\",\n \"Max 2\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg 2\":\"&avg_smemLat\",\n \"Min 2\":\"&min_smemLat\",\n \"Max 2\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL2", + "target": "${Workload2}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg 2\":\"&avg_instrFetchLat\",\n \"Min 2\":\"&min_instrFetchLat\",\n \"Max 2\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS2", + "target": "${Workload2}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg 2\":\"&avg_ldsLat\",\n \"Min 2\":\"&min_ldsLat\",\n \"Max 2\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + } + ], + "title": "Memory Latencies", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Compute Pipeline", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 98, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 205, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: LDS", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Access Rate 1": 6, + "Access Rate 2": 7, + "Bandwith (Pct-of-Peak) 1": 0, + "Bandwith (Pct-of-Peak) 2": 1, + "Bank Conflict Rate 1": 2, + "Bank Conflict Rate 2": 3, + "Utilization 1": 4, + "Utilization 2": 5 + }, + "renameByName": { + "Access Rate 1": "Access Rate (Current)", + "Access Rate 2": "Access Rate (Baseline)", + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "Utilization 1": "Util (Current)", + "Utilization 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "min": -100000000000000000000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 141 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "LDS Stats", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "frame", + "frameNameMode": "field" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Local Data Share (LDS)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 44, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 48, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\" ] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Instruction Cache ", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW (Pct-of-Peak) 1": 4, + "BW (Pct-of-Peak) 2": 5, + "Cache Hit 1": 6, + "Cache Hit 2": 7, + "Stall 1": 2, + "Stall 2": 3, + "Util 1": 0, + "Util 2": 1 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 259, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&avg_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&avg_hits\",\n \"Min\": \"&min_hits\",\n \"Max\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&avg_misses\",\n \"Min\": \"&min_misses\",\n \"Max\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean\": \"&avg_misses_dup\",\n \"Min\": \"&min_misses_dup\",\n \"Max\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n \n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&avg_cacheHit\",\n \"Min\": \"&min_cacheHit\",\n \"Max\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&avg_req\",\n \"Min 2\": \"&min_req\",\n \"Max 2\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&avg_hits\",\n \"Min 2\": \"&min_hits\",\n \"Max 2\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&avg_misses\",\n \"Min 2\" : \"&min_misses\",\n \"Max 2\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean 2\": \"&avg_misses_dup\",\n \"Min 2\": \"&min_misses_dup\",\n \"Max 2\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&avg_cacheHit\",\n \"Min 2\": \"&min_cacheHit\",\n \"Max 2\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Instruction Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "L1I Metric": "", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Instruction Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 203, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L1K-TC BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 54, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $numSQC]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "sY628IJnz" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $numSQC2]}, {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Scalar L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW Pct-of-Peak 1": 0, + "BW Pct-of-Peak 2": 1, + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "Stall 1": 6, + "Stall 2": 7, + "Util 1": 4, + "Util 2": 5 + }, + "renameByName": { + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 261, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\": \"&req_min\",\n \"Max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\": \"&hits_min\",\n \"Max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&misses_avg\",\n \"Min\": \"&misses_min\",\n \"Max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean\": \"&dup_misses_avg\",\n \"Min\": \"&dup_misses_min\",\n \"Max\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&cacheHit_avg\",\n \"Min\": \"&cacheHit_min\",\n \"Max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean\": \"&read1d_avg\",\n \"Min\": \"&read1d_min\",\n \"Max\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean\": \"&read2d_avg\",\n \"Min\": \"&read2d_min\",\n \"Max\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean\": \"&read4d_avg\",\n \"Min\": \"&read4d_min\",\n \"Max\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean\": \"&read8d_avg\",\n \"Min\": \"&read8d_min\",\n \"Max\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean\": \"&read16d_avg\",\n \"Min\": \"&read16d_min\",\n \"Max\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }}, \n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&req_avg\",\n \"Min 2\": \"&req_min\",\n \"Max 2\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&hits_avg\",\n \"Min 2\": \"&hits_min\",\n \"Max 2\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&misses_avg\",\n \"Min 2\": \"&misses_min\",\n \"Max 2\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean 2\": \"&dup_misses_avg\",\n \"Min 2\": \"&dup_misses_min\",\n \"Max 2\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&cacheHit_avg\",\n \"Min 2\": \"&cacheHit_min\",\n \"Max 2\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean 2\": \"&readReq_avg\",\n \"Min 2\": \"&readReq_min\",\n \"Max 2\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req (Total)\",\n \"Mean 2\": \"&writeReq_avg\",\n \"Min 2\": \"&writeReq_min\",\n \"Max 2\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean 2\": \"&atomicReq_avg\",\n \"Min 2\": \"&atomicReq_min\",\n \"Max 2\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean 2\": \"&read1d_avg\",\n \"Min 2\": \"&read1d_min\",\n \"Max 2\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean 2\": \"&read2d_avg\",\n \"Min 2\": \"&read2d_min\",\n \"Max 2\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean 2\": \"&read4d_avg\",\n \"Min 2\": \"&read4d_min\",\n \"Max 2\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean 2\": \"&read8d_avg\",\n \"Min 2\": \"&read8d_min\",\n \"Max 2\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean 2\": \"&read16d_avg\",\n \"Min 2\": \"&read16d_min\",\n \"Max 2\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 134 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 52, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache - L2 Interface", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Scalar L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 130, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 132, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "TA", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 136 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 134, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "TD", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Texture Addresser and Texture Data (TA/TD)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 112, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 165, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&EndNs\", \"&BeginNs\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $numCU2]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Vector L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "128B Read Combining 1": 6, + "128B Read Combining 2": 7, + "Buffer Coalescing 1": 0, + "Buffer Coalescing 2": 1, + "Cache BW 1": 2, + "Cache BW 2": 3, + "Cache Hit 1": 4, + "Cache Hit 2": 5 + }, + "renameByName": { + "128B Read Combining 1": "128B Read Combining (Current)", + "128B Read Combining 2": "128B Read Combining(Baseline)", + "Buffer Coalescing 1": "Buf Coalescing (Current)", + "Buffer Coalescing 2": "Buf Coalescing (Baseline)", + "Cache BW 1": "Cache BW (Current)", + "Cache BW 2": "Cache BW (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Cache Util 1": "Cache Util (Current)", + "Cache Util 2": "Cache Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-background" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 52 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 199 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 116, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Stalls", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true, + "unit 2": true + }, + "indexByName": { + "Max 1": 6, + "Max 2": 7, + "Mean 1": 2, + "Mean 2": 3, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 4, + "Min 2": 5, + "unit 1": 9, + "unit 2": 8 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "unit 1": "Unit" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 116 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 78 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 50 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 128, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n\n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Coherency", + "GroupCols": 2, + "GroupGap": 5, + "GroupLabelColor": "#FF9830", + "GroupLabelFontSize": "100%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Xfer", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FADE2A", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:172", + "Col": 2, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Mean", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 120, + "pluginVersion": "8.2.1", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n\n \"readNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \n \"writeNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \n \"atomicNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&readNC_avg\",\n \"Min\": \"&readNC_min\",\n \"Max\": \"&readNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&readUC_avg\",\n \"Min\": \"&readUC_min\",\n \"Max\": \"&readUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&readCC_avg\",\n \"Min\": \"&readCC_min\",\n \"Max\": \"&readCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&readRW_avg\",\n \"Min\": \"&readRW_min\",\n \"Max\": \"&readRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&writeRW_avg\",\n \"Min\": \"&writeRW_min\",\n \"Max\": \"&writeRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&writeNC_avg\",\n \"Min\": \"&writeNC_min\",\n \"Max\": \"&writeNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&writeUC_avg\",\n \"Min\": \"&writeUC_min\",\n \"Max\": \"&writeUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&writeCC_avg\",\n \"Min\": \"&writeCC_min\",\n \"Max\": \"&writeCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&atomicNC_avg\",\n \"Min\": \"&atomicNC_min\",\n \"Max\": \"&atomicNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&atomicUC_avg\",\n \"Min\": \"&atomicUC_min\",\n \"Max\": \"&atomicUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&atomicCC_avg\",\n \"Min\": \"&atomicCC_min\",\n \"Max\": \"&atomicCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&atomicRW_avg\",\n \"Min\": \"&atomicRW_min\",\n \"Max\": \"&atomicRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D - L2 Transactions Req $normUnit", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Units" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 124, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Addr Translation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Units 1": 9, + "Units 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Vector L1 Data Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 56, + "panels": [ + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + }, + { + "id": "color" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Util" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 100 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache Hit" + }, + "properties": [ + { + "id": "max", + "value": 100 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Wr BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 64, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$L2Banks2\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: L2 Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "L2 Util 1": 0, + "L2 Util 2": 1, + "L2-EA Rd BW 1": 4, + "L2-EA Rd BW 2": 5, + "L2-EA Wr BW 1": 6, + "L2-EA Wr BW 2": 7 + }, + "renameByName": { + "Cache Hit 1": "L2 Cache Hit (Current)", + "Cache Hit 2": "L2 Cache Hit (Baseline)", + "L2 Util 1": "L2 Util (Current)", + "L2 Util 2": "L2 Util (Baseline)", + "L2-EA Rd BW - GB/s 1": "L2-EA RD BW (Current)", + "L2-EA Rd BW - GB/s 2": "L2-EA RD BW (baseline)", + "L2-EA Rd BW 1": "L2-EA Rd BW (Current)", + "L2-EA Rd BW 2": "L2-EA Rd BW (Baseline)", + "L2-EA Wr BW - GB/s 1": "L2-EA WR BW (Current)", + "L2-EA Wr BW - GB/s 2": "L2-EA WR BW (Baseline)", + "L2-EA Wr BW 1": "L2-EA Wr BW (Current)", + "L2-EA Wr BW 2": "L2-EA Wr BW (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 62, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Transactions", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 178 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + } + ] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 58, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "L2 Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Transaction", + "GroupCols": 1, + "GroupGap": 5, + "GroupLabelColor": "#FADE2A", + "GroupLabelFontSize": "120%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Metric", + "LabelColor": "#ffffff", + "LabelFontSize": "80%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FF9830", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:81", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 60, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"ioStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"ioStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"creditStarvation_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_min\": {\n \"$min\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_max\": {\n \"$max\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n } \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_READ_avg\",\n \"Min\": \"&ioStall_READ_min\",\n \"Max\": \"&ioStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_READ_avg\",\n \"Min\": \"&gmiStall_READ_min\",\n \"Max\": \"&gmiStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_READ_avg\",\n \"Min\": \"&hbmStall_READ_min\",\n \"Max\": \"&hbmStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_WRITE_avg\",\n \"Min\": \"&ioStall_WRITE_min\",\n \"Max\": \"&ioStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_WRITE_avg\",\n \"Min\": \"&gmiStall_WRITE_min\",\n \"Max\": \"&gmiStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_WRITE_avg\",\n \"Min\": \"&hbmStall_WRITE_min\",\n \"Max\": \"&hbmStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Credit Starvation\",\n \"Transaction\": \"Write\",\n \"Target\": \"Fabric\",\n \"Avg\": \"&creditStarvation_avg\",\n \"Min\": \"&creditStarvation_min\",\n \"Max\": \"&creditStarvation_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Interface Stalls (Cycles $normUnit)", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 66, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 314, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"mean_hit_rate\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[29]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_hit_rate\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_hit_rate\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_hit_rate\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_read_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_read_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_read_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_read_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_write_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_write_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_write_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_write_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_atomic_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_atomic_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_atomic_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_atomic_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaRead_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_eaRead_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_eaRead_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_eaRead_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaWrite_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_eaWrite_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_eaWrite_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_eaWrite_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaAtomic_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_eaAtomic_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_eaAtomic_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_eaAtomic_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaRead_lat\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_eaRead_lat\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_eaRead_lat\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_eaRead_lat\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_eaWrite_lat\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_eaWrite_lat\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_eaWrite_lat\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_eaWrite_lat\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_eaAtomic_lat\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_eaAtomic_lat\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_eaAtomic_lat\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_eaAtomic_lat\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_ea_read_stall_io_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_read_stall_io_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_read_stall_io_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_read_stall_io_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_read_stall_gmi_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_read_stall_gmi_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_read_stall_gmi_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_read_stall_gmi_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_read_stall_dram_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_read_stall_dram_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_read_stall_dram_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_read_stall_dram_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_io_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_io_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_io_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_io_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_gmi_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_gmi_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_gmi_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_gmi_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_dram_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_dram_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_dram_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_dram_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_too_many\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_too_many\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_too_many\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_too_many\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Mean\": \"&mean_hit_rate\",\n \"Std Dev\": \"&stdDev_hit_rate\",\n \"Min\": \"&min_hit_rate\",\n \"Max\": \"&max_hit_rate\",\n \"Units\": \"pct\"\n },\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&mean_req\",\n \"Std Dev\": \"&stdDev_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L1 - L2 Read Req\",\n \"Mean\": \"&mean_read_req\",\n \"Std Dev\": \"&stdDev_read_req\",\n \"Min\": \"&min_read_req\",\n \"Max\": \"&max_read_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L1 - L2 Write Req\",\n \"Mean\": \"&mean_write_req\",\n \"Std Dev\": \"&stdDev_write_req\",\n \"Min\": \"&min_write_req\",\n \"Max\": \"&max_write_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L1 - L2 Atomic Req\",\n \"Mean\": \"&mean_atomic_req\",\n \"Std Dev\": \"&stdDev_atomic_req\",\n \"Min\": \"&min_atomic_req\",\n \"Max\": \"&max_atomic_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Read Req\",\n \"Mean\": \"&mean_eaRead_req\",\n \"Std Dev\": \"&stdDev_eaRead_req\",\n \"Min\": \"&min_eaRead_req\",\n \"Max\": \"&max_eaRead_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Write Req\", \n \"Mean\": \"&mean_eaWrite_req\",\n \"Std Dev\": \"&stdDev_eaWrite_req\",\n \"Min\": \"&min_eaWrite_req\",\n \"Max\": \"&max_eaWrite_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Atomic Req\",\n \"Mean\": \"&mean_eaAtomic_req\",\n \"Std Dev\": \"&stdDev_eaAtomic_req\",\n \"Min\": \"&min_eaAtomic_req\",\n \"Max\": \"&max_eaAtomic_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Read Lat\",\n \"Mean\": \"&mean_eaRead_lat\",\n \"Std Dev\": \"&stdDev_eaRead_lat\",\n \"Min\": \"&min_eaRead_lat\",\n \"Max\": \"&max_eaRead_lat\",\n \"Units\": \"Cycles\"\n },\n {\n \"Metric\": \"L2 - EA Write Lat\",\n \"Mean\": \"&mean_eaWrite_lat\",\n \"Std Dev\": \"&stdDev_eaWrite_lat\",\n \"Min\": \"&min_eaWrite_lat\",\n \"Max\": \"&max_eaWrite_lat\",\n \"Units\": \"Cycles\"\n },\n {\n \"Metric\": \"L2 - EA Atomic Lat\",\n \"Mean\": \"&mean_eaAtomic_lat\",\n \"Std Dev\": \"&stdDev_eaAtomic_lat\",\n \"Min\": \"&min_eaAtomic_lat\",\n \"Max\": \"&max_eaAtomic_lat\",\n \"Units\": \"Cycles\"\n },\n {\n \"Metric\": \"L2 - EA Read Stall (IO)\",\n \"Mean\": \"&mean_ea_read_stall_io_credit\",\n \"Std Dev\": \"&stdDev_ea_read_stall_io_credit\",\n \"Min\": \"&min_ea_read_stall_io_credit\",\n \"Max\": \"&max_ea_read_stall_io_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Read Stall (GMI)\",\n \"Mean\": \"&mean_ea_read_stall_gmi_credit\",\n \"Std Dev\": \"&stdDev_ea_read_stall_gmi_credit\",\n \"Min\": \"&min_ea_read_stall_gmi_credit\",\n \"Max\": \"&max_ea_read_stall_gmi_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Read Stall (DRAM)\",\n \"Mean\": \"&mean_ea_read_stall_dram_credit\",\n \"Std Dev\": \"&stdDev_ea_read_stall_dram_credit\",\n \"Min\": \"&min_ea_read_stall_dram_credit\",\n \"Max\": \"&max_ea_read_stall_dram_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Stall (IO)\",\n \"Mean\": \"&mean_ea_write_stall_io_credit\",\n \"Std Dev\": \"&stdDev_ea_write_stall_io_credit\",\n \"Min\": \"&min_ea_write_stall_io_credit\",\n \"Max\": \"&max_ea_write_stall_io_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Stall (GMI)\",\n \"Mean\": \"&mean_ea_write_stall_gmi_credit\",\n \"Std Dev\": \"&stdDev_ea_write_stall_gmi_credit\",\n \"Min\": \"&min_ea_write_stall_gmi_credit\",\n \"Max\": \"&max_ea_write_stall_gmi_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Stall (DRAM)\",\n \"Mean\": \"&mean_ea_write_stall_dram_credit\",\n \"Std Dev\": \"&stdDev_ea_write_stall_dram_credit\",\n \"Min\": \"&min_ea_write_stall_dram_credit\",\n \"Max\": \"&max_ea_write_stall_dram_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Starve\",\n \"Mean\": \"&mean_ea_write_stall_too_many\",\n \"Std Dev\": \"&stdDev_ea_write_stall_too_many\",\n \"Min\": \"&min_ea_write_stall_too_many\",\n \"Max\": \"&max_ea_write_stall_too_many\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Aggregate Stats (All 32 channels)", + "transformations": [], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 87, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 Cache Hit Rate (Percent) (Channel 0 - 15) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 92, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "Cache Hit Rate % (Channel 16 - 31) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:565", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 81, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Read Requests(Channel 0-15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:656", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 82, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L 2 Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:697", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 83, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:750", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 84, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 85, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 91, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 189, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 195, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 191, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 197, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 73 + }, + "id": 193, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Request (Channel 0 - 15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 73 + }, + "id": 199, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "", + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 81 + }, + "hideTimeOverride": false, + "id": 68, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b0_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[0]\"]}, \n { \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b0_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[0]\"}, \"&denom\"] } \n },\n \"b0_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[0]\"}, \"&denom\"] } \n },\n \"b0_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[0]\"}, \"&denom\"] } \n },\n \"b0_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[0]\"}, \"&denom\"] } \n },\n \"b0_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[0]\"}, \"&denom\"] }\n },\n \"b0_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[0]\"}, \"&denom\"] } \n },\n \"b0_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[0]\"}, \"&denom\"] } \n },\n\n \"b0_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[0]\", \"&TCC_EA_RDREQ[0]\"]}, null] } },\n \"b0_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[0]\", \"&TCC_EA_WRREQ[0]\"]}, null] } },\n \"b0_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[0]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[0]\", \"&TCC_EA_ATOMIC[0]\"]}, null]}},\n\n \"b0_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"}, \"&denom\"] }},\n\n \n \"b1_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[1]\"]}, \n { \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b1_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[1]\"}, \"&denom\"] } \n },\n \"b1_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[1]\"}, \"&denom\"] } \n },\n \"b1_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[1]\"}, \"&denom\"] } \n },\n \"b1_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[1]\"}, \"&denom\"] }\n },\n \"b1_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[1]\"}, \"&denom\"] } \n },\n \"b1_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[1]\", \"&TCC_EA_RDREQ[1]\"]}, null] } },\n \"b1_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[1]\", \"&TCC_EA_WRREQ[1]\"]}, null] } },\n \"b1_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[1]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[1]\", \"&TCC_EA_ATOMIC[1]\"]}, null]}},\n\n \"b1_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"}, \"&denom\"] }},\n\n\n \"b2_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[2]\"]}, \n { \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b2_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[2]\"}, \"&denom\"] }\n },\n \"b2_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[2]\"}, \"&denom\"] } \n },\n \"b2_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[2]\"}, \"&denom\"] }\n },\n \"b2_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[2]\", \"&TCC_EA_RDREQ[2]\"]}, null] } },\n \"b2_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[2]\", \"&TCC_EA_WRREQ[2]\"]}, null] } },\n \"b2_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[2]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[2]\", \"&TCC_EA_ATOMIC[2]\"]}, null]}},\n\n \"b2_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"}, \"&denom\"] }},\n\n\n \n \"b3_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[3]\"]}, \n { \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b3_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[3]\"}, \"&denom\"] } \n },\n \"b3_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[3]\"}, \"&denom\"] } \n },\n \"b3_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[3]\"}, \"&denom\"] }\n },\n \"b3_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[3]\"}, \"&denom\"] }\n },\n \"b3_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[3]\"}, \"&denom\"] } \n },\n \"b3_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[3]\", \"&TCC_EA_RDREQ[3]\"]}, null] } },\n \"b3_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[3]\", \"&TCC_EA_WRREQ[3]\"]}, null] } },\n \"b3_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[3]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[3]\", \"&TCC_EA_ATOMIC[3]\"]}, null]}},\n\n \"b3_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"}, \"&denom\"] }},\n\n\n \n \"b4_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[4]\"]}, \n { \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b4_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[4]\"}, \"&denom\"] } \n },\n \"b4_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[4]\"}, \"&denom\"] } \n },\n \"b4_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[4]\"}, \"&denom\"] }\n },\n \"b4_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[4]\", \"&TCC_EA_RDREQ[4]\"]}, null] } },\n \"b4_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[4]\", \"&TCC_EA_WRREQ[4]\"]}, null] } },\n \"b4_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[4]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[4]\", \"&TCC_EA_ATOMIC[4]\"]}, null]}},\n\n \"b4_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"}, \"&denom\"] }},\n\n\n \n \"b5_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[5]\"]}, \n { \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b5_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[5]\"}, \"&denom\"] } \n },\n \"b5_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[5]\"}, \"&denom\"] } \n },\n \"b5_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[5]\"}, \"&denom\"] } \n },\n \"b5_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[5]\", \"&TCC_EA_RDREQ[5]\"]}, null] } },\n \"b5_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[5]\", \"&TCC_EA_WRREQ[5]\"]}, null] } },\n \"b5_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[5]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[5]\", \"&TCC_EA_ATOMIC[5]\"]}, null]}},\n\n \"b5_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"}, \"&denom\"] }},\n\n\n \n \"b6_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[6]\"]}, \n { \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b6_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[6]\"}, \"&denom\"] } \n },\n \"b6_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[6]\"}, \"&denom\"] } \n },\n \"b6_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[6]\"}, \"&denom\"] } \n },\n \"b6_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[6]\"}, \"&denom\"] } \n },\n \"b6_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[6]\"}, \"&denom\"] }\n },\n \"b6_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[6]\", \"&TCC_EA_RDREQ[6]\"]}, null] } },\n \"b6_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[6]\", \"&TCC_EA_WRREQ[6]\"]}, null] } },\n \"b6_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[6]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[6]\", \"&TCC_EA_ATOMIC[6]\"]}, null]}},\n\n \"b6_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"}, \"&denom\"] }},\n\n\n \n \"b7_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[7]\"]}, \n { \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b7_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[7]\"}, \"&denom\"] } \n },\n \"b7_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[7]\"}, \"&denom\"] } \n },\n \"b7_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[7]\"}, \"&denom\"] } \n },\n \"b7_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[7]\"}, \"&denom\"] } \n },\n \"b7_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[7]\"}, \"&denom\"] }\n },\n \"b7_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[7]\", \"&TCC_EA_RDREQ[7]\"]}, null] } },\n \"b7_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[7]\", \"&TCC_EA_WRREQ[7]\"]}, null] } },\n \"b7_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[7]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[7]\", \"&TCC_EA_ATOMIC[7]\"]}, null]}},\n\n \"b7_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"}, \"&denom\"] }},\n\n\n \n \"b8_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[8]\"]}, \n { \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b8_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[8]\"}, \"&denom\"] } \n },\n \"b8_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[8]\"}, \"&denom\"] } \n },\n \"b8_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[8]\"}, \"&denom\"] } \n },\n \"b8_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[8]\", \"&TCC_EA_RDREQ[8]\"]}, null] } },\n \"b8_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[8]\", \"&TCC_EA_WRREQ[8]\"]}, null] } },\n \"b8_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[8]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[8]\", \"&TCC_EA_ATOMIC[8]\"]}, null]}},\n\n \"b8_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"}, \"&denom\"] }},\n\n\n \n \"b9_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[9]\"]}, \n { \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b9_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[9]\"}, \"&denom\"] } \n },\n \"b9_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[9]\"}, \"&denom\"] } \n },\n \"b9_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[9]\"}, \"&denom\"] } \n },\n \"b9_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[9]\", \"&TCC_EA_RDREQ[9]\"]}, null] } },\n \"b9_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[9]\", \"&TCC_EA_WRREQ[9]\"]}, null] } },\n \"b9_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[9]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[9]\", \"&TCC_EA_ATOMIC[9]\"]}, null]}},\n\n \"b9_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"}, \"&denom\"] }},\n\n\n \n \"b10_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[10]\"]}, \n { \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b10_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[10]\"}, \"&denom\"] } \n },\n \"b10_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[10]\"}, \"&denom\"] } \n },\n \"b10_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[10]\"}, \"&denom\"] } \n },\n \"b10_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[10]\", \"&TCC_EA_RDREQ[10]\"]}, null] } },\n \"b10_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[10]\", \"&TCC_EA_WRREQ[10]\"]}, null] } },\n \"b10_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[10]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[10]\", \"&TCC_EA_ATOMIC[10]\"]}, null]}},\n\n \"b10_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"}, \"&denom\"] }},\n\n\n \n \"b11_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[11]\"]}, \n { \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b11_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[11]\"}, \"&denom\"] } \n },\n \"b11_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[11]\"}, \"&denom\"] } \n },\n \"b11_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[11]\"}, \"&denom\"] } \n },\n \"b11_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[11]\", \"&TCC_EA_RDREQ[11]\"]}, null] } },\n \"b11_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[11]\", \"&TCC_EA_WRREQ[11]\"]}, null] } },\n \"b11_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[11]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[11]\", \"&TCC_EA_ATOMIC[11]\"]}, null]}},\n\n \"b11_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"}, \"&denom\"] }},\n\n\n \n \"b12_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[12]\"]}, \n { \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b12_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[12]\"}, \"&denom\"] } \n },\n \"b12_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[12]\"}, \"&denom\"] } \n },\n \"b12_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[12]\"}, \"&denom\"] } \n },\n \"b12_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[12]\", \"&TCC_EA_RDREQ[12]\"]}, null] } },\n \"b12_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[12]\", \"&TCC_EA_WRREQ[12]\"]}, null] } },\n \"b12_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[12]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[12]\", \"&TCC_EA_ATOMIC[12]\"]}, null]}},\n\n \"b12_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"}, \"&denom\"] }},\n\n\n \n \"b13_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[13]\"]}, \n { \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b13_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[13]\"}, \"&denom\"] } \n },\n \"b13_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[13]\"}, \"&denom\"] } \n },\n \"b13_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[13]\"}, \"&denom\"] } \n },\n \"b13_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[13]\"}, \"&denom\"] } \n },\n \"b13_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[13]\"}, \"&denom\"] }\n },\n \"b13_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[13]\", \"&TCC_EA_RDREQ[13]\"]}, null] } },\n \"b13_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[13]\", \"&TCC_EA_WRREQ[13]\"]}, null] } },\n \"b13_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[13]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[13]\", \"&TCC_EA_ATOMIC[13]\"]}, null]}},\n\n \"b13_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"}, \"&denom\"] }},\n\n\n \n \"b14_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[14]\"]}, \n { \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b14_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[14]\"}, \"&denom\"] } \n },\n \"b14_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[14]\"}, \"&denom\"] } \n },\n \"b14_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[14]\"}, \"&denom\"] } \n },\n \"b14_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[14]\", \"&TCC_EA_RDREQ[14]\"]}, null] } },\n \"b14_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[14]\", \"&TCC_EA_WRREQ[14]\"]}, null] } },\n \"b14_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[14]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[14]\", \"&TCC_EA_ATOMIC[14]\"]}, null]}},\n\n \"b14_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"}, \"&denom\"] }},\n\n\n \n \"b15_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[15]\"]}, \n { \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b15_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[15]\"}, \"&denom\"] } \n },\n \"b15_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[15]\"}, \"&denom\"] } \n },\n \"b15_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[15]\"}, \"&denom\"] } \n },\n \"b15_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[15]\"}, \"&denom\"] } \n },\n \"b15_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[15]\"}, \"&denom\"] }\n },\n \"b15_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[15]\", \"&TCC_EA_RDREQ[15]\"]}, null] } },\n \"b15_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[15]\", \"&TCC_EA_WRREQ[15]\"]}, null] } },\n \"b15_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[15]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[15]\", \"&TCC_EA_ATOMIC[15]\"]}, null]}},\n\n \"b15_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"}, \"&denom\"] }}\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"0\",\n \"Hit Rate\": \"&b0_hitRate\",\n \"Req\": \"&b0_req\",\n \"Read Req\": \"&b0_readReq\",\n \"Write Req\": \"&b0_writeReq\",\n \"AtomicReq\": \"&b0_atomicReq\",\n \"EA Read Req\": \"&b0_eaReadReq\",\n \"EA Write Req\": \"&b0_eaWriteReq\",\n \"EA AtomicReq\": \"&b0_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b0_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b0_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b0_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b0_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b0_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b0_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b0_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b0_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b0_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b0_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n },\n {\n \"Channel\": \"1\",\n \"Hit Rate\": \"&b1_hitRate\",\n \"Req\": \"&b1_req\",\n \"Read Req\": \"&b1_readReq\",\n \"Write Req\": \"&b1_writeReq\",\n \"AtomicReq\": \"&b1_atomicReq\",\n \"EA Read Req\": \"&b1_eaReadReq\",\n \"EA Write Req\": \"&b1_eaWriteReq\",\n \"EA AtomicReq\": \"&b1_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b1_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b1_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b1_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b1_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b1_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b1_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b1_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b1_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b1_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b1_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n },\n {\n \"Channel\": \"2\",\n \"Hit Rate\": \"&b2_hitRate\",\n \"Req\": \"&b2_req\",\n \"Read Req\": \"&b2_readReq\",\n \"Write Req\": \"&b2_writeReq\",\n \"AtomicReq\": \"&b2_atomicReq\",\n \"EA Read Req\": \"&b2_eaReadReq\",\n \"EA Write Req\": \"&b2_eaWriteReq\",\n \"EA AtomicReq\": \"&b2_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b2_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b2_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b2_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b2_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b2_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b2_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b2_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b2_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b2_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b2_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"3\",\n \"Hit Rate\": \"&b3_hitRate\",\n \"Req\": \"&b3_req\",\n \"Read Req\": \"&b3_readReq\",\n \"Write Req\": \"&b3_writeReq\",\n \"AtomicReq\": \"&b3_atomicReq\",\n \"EA Read Req\": \"&b3_eaReadReq\",\n \"EA Write Req\": \"&b3_eaWriteReq\",\n \"EA AtomicReq\": \"&b3_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b3_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b3_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b3_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b3_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b3_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b3_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b3_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b3_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b3_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b3_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"4\",\n \"Hit Rate\": \"&b4_hitRate\",\n \"Req\": \"&b4_req\",\n \"Read Req\": \"&b4_readReq\",\n \"Write Req\": \"&b4_writeReq\",\n \"AtomicReq\": \"&b4_atomicReq\",\n \"EA Read Req\": \"&b4_eaReadReq\",\n \"EA Write Req\": \"&b4_eaWriteReq\",\n \"EA AtomicReq\": \"&b4_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b4_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b4_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b4_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b4_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b4_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b4_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b4_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b4_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b4_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b4_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"5\",\n \"Hit Rate\": \"&b5_hitRate\",\n \"Req\": \"&b5_req\",\n \"Read Req\": \"&b5_readReq\",\n \"Write Req\": \"&b5_writeReq\",\n \"AtomicReq\": \"&b5_atomicReq\",\n \"EA Read Req\": \"&b5_eaReadReq\",\n \"EA Write Req\": \"&b5_eaWriteReq\",\n \"EA AtomicReq\": \"&b5_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b5_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b5_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b5_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b5_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b5_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b5_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b5_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b5_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b5_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b5_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"6\",\n \"Hit Rate\": \"&b6_hitRate\",\n \"Req\": \"&b6_req\",\n \"Read Req\": \"&b6_readReq\",\n \"Write Req\": \"&b6_writeReq\",\n \"AtomicReq\": \"&b6_atomicReq\",\n \"EA Read Req\": \"&b6_eaReadReq\",\n \"EA Write Req\": \"&b6_eaWriteReq\",\n \"EA AtomicReq\": \"&b6_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b6_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b6_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b6_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b6_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b6_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b6_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b6_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b6_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b6_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b6_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"7\",\n \"Hit Rate\": \"&b7_hitRate\",\n \"Req\": \"&b7_req\",\n \"Read Req\": \"&b7_readReq\",\n \"Write Req\": \"&b7_writeReq\",\n \"AtomicReq\": \"&b7_atomicReq\",\n \"EA Read Req\": \"&b7_eaReadReq\",\n \"EA Write Req\": \"&b7_eaWriteReq\",\n \"EA AtomicReq\": \"&b7_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b7_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b7_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b7_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b7_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b7_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b7_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b7_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b7_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b7_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b7_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"8\",\n \"Hit Rate\": \"&b8_hitRate\",\n \"Req\": \"&b8_req\",\n \"Read Req\": \"&b8_readReq\",\n \"Write Req\": \"&b8_writeReq\",\n \"AtomicReq\": \"&b8_atomicReq\",\n \"EA Read Req\": \"&b8_eaReadReq\",\n \"EA Write Req\": \"&b8_eaWriteReq\",\n \"EA AtomicReq\": \"&b8_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b8_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b8_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b8_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b8_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b8_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b8_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b8_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b8_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b8_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b8_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"9\",\n \"Hit Rate\": \"&b9_hitRate\",\n \"Req\": \"&b9_req\",\n \"Read Req\": \"&b9_readReq\",\n \"Write Req\": \"&b9_writeReq\",\n \"AtomicReq\": \"&b9_atomicReq\",\n \"EA Read Req\": \"&b9_eaReadReq\",\n \"EA Write Req\": \"&b9_eaWriteReq\",\n \"EA AtomicReq\": \"&b9_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b9_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b9_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b9_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b9_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b9_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b9_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b9_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b9_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b9_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b9_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"10\",\n \"Hit Rate\": \"&b10_hitRate\",\n \"Req\": \"&b10_req\",\n \"Read Req\": \"&b10_readReq\",\n \"Write Req\": \"&b10_writeReq\",\n \"AtomicReq\": \"&b10_atomicReq\",\n \"EA Read Req\": \"&b10_eaReadReq\",\n \"EA Write Req\": \"&b10_eaWriteReq\",\n \"EA AtomicReq\": \"&b10_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b10_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b10_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b10_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b10_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b10_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b10_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b10_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b10_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b10_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b10_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"11\",\n \"Hit Rate\": \"&b11_hitRate\",\n \"Req\": \"&b11_req\",\n \"Read Req\": \"&b11_readReq\",\n \"Write Req\": \"&b11_writeReq\",\n \"AtomicReq\": \"&b11_atomicReq\",\n \"EA Read Req\": \"&b11_eaReadReq\",\n \"EA Write Req\": \"&b11_eaWriteReq\",\n \"EA AtomicReq\": \"&b11_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b11_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b11_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b11_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b11_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b11_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b11_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b11_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b11_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b11_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b11_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"12\",\n \"Hit Rate\": \"&b12_hitRate\",\n \"Req\": \"&b12_req\",\n \"Read Req\": \"&b12_readReq\",\n \"Write Req\": \"&b12_writeReq\",\n \"AtomicReq\": \"&b12_atomicReq\",\n \"EA Read Req\": \"&b12_eaReadReq\",\n \"EA Write Req\": \"&b12_eaWriteReq\",\n \"EA AtomicReq\": \"&b12_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b12_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b12_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b12_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b12_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b12_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b12_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b12_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b12_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b12_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b12_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"13\",\n \"Hit Rate\": \"&b13_hitRate\",\n \"Req\": \"&b13_req\",\n \"Read Req\": \"&b13_readReq\",\n \"Write Req\": \"&b13_writeReq\",\n \"AtomicReq\": \"&b13_atomicReq\",\n \"EA Read Req\": \"&b13_eaReadReq\",\n \"EA Write Req\": \"&b13_eaWriteReq\",\n \"EA AtomicReq\": \"&b13_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b13_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b13_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b13_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b13_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b13_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b13_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b13_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b13_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b13_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b13_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"14\",\n \"Hit Rate\": \"&b14_hitRate\",\n \"Req\": \"&b14_req\",\n \"Read Req\": \"&b14_readReq\",\n \"Write Req\": \"&b14_writeReq\",\n \"AtomicReq\": \"&b14_atomicReq\",\n \"EA Read Req\": \"&b14_eaReadReq\",\n \"EA Write Req\": \"&b14_eaWriteReq\",\n \"EA AtomicReq\": \"&b14_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b14_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b14_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b14_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b14_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b14_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b14_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b14_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b14_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b14_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b14_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"15\",\n \"Hit Rate\": \"&b15_hitRate\",\n \"Req\": \"&b15_req\",\n \"Read Req\": \"&b15_readReq\",\n \"Write Req\": \"&b15_writeReq\",\n \"AtomicReq\": \"&b15_atomicReq\",\n \"EA Read Req\": \"&b15_eaReadReq\",\n \"EA Write Req\": \"&b15_eaWriteReq\",\n \"EA AtomicReq\": \"&b15_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b15_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b15_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b15_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b15_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b15_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b15_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b15_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b15_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b15_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b15_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 81 + }, + "id": 70, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b16_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[16]\"]}, \n { \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b16_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[16]\"}, \"&denom\"] } \n },\n \"b16_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[16]\"}, \"&denom\"] } \n },\n \"b16_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[16]\"}, \"&denom\"] } \n },\n \"b16_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[16]\"}, \"&denom\"] } \n },\n \"b16_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[16]\"}, \"&denom\"] }\n },\n \"b16_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[16]\"}, \"&denom\"] } \n },\n \"b16_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[16]\"}, \"&denom\"] } \n },\n\n \"b16_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[16]\", \"&TCC_EA_RDREQ[16]\"]}, null] } },\n \"b16_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[16]\", \"&TCC_EA_WRREQ[16]\"]}, null] } },\n \"b16_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[16]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[16]\", \"&TCC_EA_ATOMIC[16]\"]}, null]}},\n \"b16_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"}, \"&denom\"] }},\n\n \n \"b17_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[17]\"]}, \n { \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b17_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[17]\"}, \"&denom\"] } \n },\n \"b17_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[17]\"}, \"&denom\"] } \n },\n \"b17_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[17]\"}, \"&denom\"] } \n },\n \"b17_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[17]\"}, \"&denom\"] }\n },\n \"b17_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[17]\"}, \"&denom\"] } \n },\n \"b17_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[17]\", \"&TCC_EA_RDREQ[17]\"]}, null] } },\n \"b17_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[17]\", \"&TCC_EA_WRREQ[17]\"]}, null] } },\n \"b17_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[17]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[17]\", \"&TCC_EA_ATOMIC[17]\"]}, null]}},\n \"b17_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"}, \"&denom\"] }},\n\n \n \"b18_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[18]\"]}, \n { \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b18_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[18]\"}, \"&denom\"] }\n },\n \"b18_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[18]\"}, \"&denom\"] } \n },\n \"b18_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[18]\"}, \"&denom\"] }\n },\n \"b18_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[18]\", \"&TCC_EA_RDREQ[18]\"]}, null] } },\n \"b18_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[18]\", \"&TCC_EA_WRREQ[18]\"]}, null] } },\n \"b18_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[18]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[18]\", \"&TCC_EA_ATOMIC[18]\"]}, null]}},\n \"b18_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"}, \"&denom\"] }},\n\n \n \"b19_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[19]\"]}, \n { \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b19_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[19]\"}, \"&denom\"] } \n },\n \"b19_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[19]\"}, \"&denom\"] } \n },\n \"b19_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[19]\"}, \"&denom\"] }\n },\n \"b19_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[19]\"}, \"&denom\"] }\n },\n \"b19_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[19]\"}, \"&denom\"] } \n },\n \"b19_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[19]\", \"&TCC_EA_RDREQ[19]\"]}, null] } },\n \"b19_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[19]\", \"&TCC_EA_WRREQ[19]\"]}, null] } },\n \"b19_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[19]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[19]\", \"&TCC_EA_ATOMIC[19]\"]}, null]}},\n \"b19_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"}, \"&denom\"] }},\n\n \n \"b20_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[20]\"]}, \n { \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b20_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[20]\"}, \"&denom\"] } \n },\n \"b20_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[20]\"}, \"&denom\"] } \n },\n \"b20_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[20]\"}, \"&denom\"] }\n },\n \"b20_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[20]\", \"&TCC_EA_RDREQ[20]\"]}, null] } },\n \"b20_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[20]\", \"&TCC_EA_WRREQ[20]\"]}, null] } },\n \"b20_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[20]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[20]\", \"&TCC_EA_ATOMIC[20]\"]}, null]}},\n \"b20_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"}, \"&denom\"] }},\n\n \n\n \"b21_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[21]\"]}, \n { \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b21_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[21]\"}, \"&denom\"] } \n },\n \"b21_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[21]\"}, \"&denom\"] } \n },\n \"b21_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[21]\"}, \"&denom\"] } \n },\n \"b21_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[21]\", \"&TCC_EA_RDREQ[21]\"]}, null] } },\n \"b21_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[21]\", \"&TCC_EA_WRREQ[21]\"]}, null] } },\n \"b21_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[21]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[21]\", \"&TCC_EA_ATOMIC[21]\"]}, null]}},\n \"b21_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"}, \"&denom\"] }},\n\n \n\n \"b22_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[22]\"]}, \n { \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b22_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[22]\"}, \"&denom\"] } \n },\n \"b22_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[22]\"}, \"&denom\"] } \n },\n \"b22_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[22]\"}, \"&denom\"] } \n },\n \"b22_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[22]\"}, \"&denom\"] } \n },\n \"b22_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[22]\"}, \"&denom\"] }\n },\n \"b22_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[22]\", \"&TCC_EA_RDREQ[22]\"]}, null] } },\n \"b22_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[22]\", \"&TCC_EA_WRREQ[22]\"]}, null] } },\n \"b22_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[22]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[22]\", \"&TCC_EA_ATOMIC[22]\"]}, null]}},\n \"b22_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"}, \"&denom\"] }},\n\n \n\n \"b23_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[23]\"]}, \n { \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b23_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[23]\"}, \"&denom\"] } \n },\n \"b23_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[23]\"}, \"&denom\"] } \n },\n \"b23_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[23]\"}, \"&denom\"] } \n },\n \"b23_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[23]\"}, \"&denom\"] } \n },\n \"b23_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[23]\"}, \"&denom\"] }\n },\n \"b23_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[23]\", \"&TCC_EA_RDREQ[23]\"]}, null] } },\n \"b23_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[23]\", \"&TCC_EA_WRREQ[23]\"]}, null] } },\n \"b23_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[23]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[23]\", \"&TCC_EA_ATOMIC[23]\"]}, null]}},\n \"b23_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"}, \"&denom\"] }},\n\n \n \"b24_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[24]\"]}, \n { \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b24_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[24]\"}, \"&denom\"] } \n },\n \"b24_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[24]\"}, \"&denom\"] } \n },\n \"b24_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[24]\"}, \"&denom\"] } \n },\n \"b24_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[24]\", \"&TCC_EA_RDREQ[24]\"]}, null] } },\n \"b24_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[24]\", \"&TCC_EA_WRREQ[24]\"]}, null] } },\n \"b24_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[24]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[24]\", \"&TCC_EA_ATOMIC[24]\"]}, null]}},\n \"b24_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"}, \"&denom\"] }},\n\n \n \"b25_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[25]\"]}, \n { \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b25_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[25]\"}, \"&denom\"] } \n },\n \"b25_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[25]\"}, \"&denom\"] } \n },\n \"b25_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[25]\"}, \"&denom\"] } \n },\n \"b25_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[25]\", \"&TCC_EA_RDREQ[25]\"]}, null] } },\n \"b25_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[25]\", \"&TCC_EA_WRREQ[25]\"]}, null] } },\n \"b25_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[25]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[25]\", \"&TCC_EA_ATOMIC[25]\"]}, null]}},\n \"b25_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"}, \"&denom\"] }},\n\n \n \"b26_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[26]\"]}, \n { \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b26_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[26]\"}, \"&denom\"] } \n },\n \"b26_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[26]\"}, \"&denom\"] } \n },\n \"b26_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[26]\"}, \"&denom\"] } \n },\n \"b26_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[26]\", \"&TCC_EA_RDREQ[26]\"]}, null] } },\n \"b26_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[26]\", \"&TCC_EA_WRREQ[26]\"]}, null] } },\n \"b26_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[26]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[26]\", \"&TCC_EA_ATOMIC[26]\"]}, null]}},\n \"b26_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"}, \"&denom\"] }},\n\n \n \"b27_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[27]\"]}, \n { \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b27_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[27]\"}, \"&denom\"] } \n },\n \"b27_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[27]\"}, \"&denom\"] } \n },\n \"b27_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[27]\"}, \"&denom\"] } \n },\n \"b27_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[27]\", \"&TCC_EA_RDREQ[27]\"]}, null] } },\n \"b27_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[27]\", \"&TCC_EA_WRREQ[27]\"]}, null] } },\n \"b27_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[27]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[27]\", \"&TCC_EA_ATOMIC[27]\"]}, null]}},\n \"b27_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"}, \"&denom\"] }},\n\n \n \"b28_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[28]\"]}, \n { \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b28_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[28]\"}, \"&denom\"] } \n },\n \"b28_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[28]\"}, \"&denom\"] } \n },\n \"b28_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[28]\"}, \"&denom\"] } \n },\n \"b28_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[28]\", \"&TCC_EA_RDREQ[28]\"]}, null] } },\n \"b28_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[28]\", \"&TCC_EA_WRREQ[28]\"]}, null] } },\n \"b28_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[28]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[28]\", \"&TCC_EA_ATOMIC[28]\"]}, null]}},\n \"b28_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"}, \"&denom\"] }},\n\n \n \"b29_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[29]\"]}, \n { \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b29_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[29]\"}, \"&denom\"] } \n },\n \"b29_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[29]\"}, \"&denom\"] } \n },\n \"b29_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[29]\"}, \"&denom\"] } \n },\n \"b29_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[29]\"}, \"&denom\"] } \n },\n \"b29_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[29]\"}, \"&denom\"] }\n },\n \"b29_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[29]\", \"&TCC_EA_RDREQ[29]\"]}, null] } },\n \"b29_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[29]\", \"&TCC_EA_WRREQ[29]\"]}, null] } },\n \"b29_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[29]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[29]\", \"&TCC_EA_ATOMIC[29]\"]}, null]}},\n \"b29_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"}, \"&denom\"] }},\n\n \n \"b30_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[30]\"]}, \n { \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b30_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[30]\"}, \"&denom\"] } \n },\n \"b30_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[30]\"}, \"&denom\"] } \n },\n \"b30_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[30]\"}, \"&denom\"] } \n },\n \"b30_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[30]\", \"&TCC_EA_RDREQ[30]\"]}, null] } },\n \"b30_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[30]\", \"&TCC_EA_WRREQ[30]\"]}, null] } },\n \"b30_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[30]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[30]\", \"&TCC_EA_ATOMIC[30]\"]}, null]}},\n \"b30_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"}, \"&denom\"] }},\n\n \n \"b31_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[31]\"]}, \n { \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b31_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[31]\"}, \"&denom\"] } \n },\n \"b31_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[31]\"}, \"&denom\"] } \n },\n \"b31_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[31]\"}, \"&denom\"] } \n },\n \"b31_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[31]\"}, \"&denom\"] } \n },\n \"b31_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}, \"&denom\"] }\n },\n \"b31_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[31]\", \"&TCC_EA_RDREQ[31]\"]}, null] } },\n \"b31_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[31]\", \"&TCC_EA_WRREQ[31]\"]}, null] } },\n \"b31_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[31]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[31]\", \"&TCC_EA_ATOMIC[31]\"]}, null]}},\n \"b31_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}, \"&denom\"] }}\n\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"16\",\n \"Hit Rate\": \"&b16_hitRate\",\n \"Req\": \"&b16_req\",\n \"Read Req\": \"&b16_readReq\",\n \"Write Req\": \"&b16_writeReq\",\n \"AtomicReq\": \"&b16_atomicReq\",\n \"EA Read Req\": \"&b16_eaReadReq\",\n \"EA Write Req\": \"&b16_eaWriteReq\",\n \"EA AtomicReq\": \"&b16_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b16_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b16_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b16_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b16_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b16_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b16_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b16_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b16_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b16_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b16_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"17\",\n \"Hit Rate\": \"&b17_hitRate\",\n \"Req\": \"&b17_req\",\n \"Read Req\": \"&b17_readReq\",\n \"Write Req\": \"&b17_writeReq\",\n \"AtomicReq\": \"&b17_atomicReq\",\n \"EA Read Req\": \"&b17_eaReadReq\",\n \"EA Write Req\": \"&b17_eaWriteReq\",\n \"EA AtomicReq\": \"&b17_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b17_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b17_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b17_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b17_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b17_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b17_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b17_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b17_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b17_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b17_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"18\",\n \"Hit Rate\": \"&b18_hitRate\",\n \"Req\": \"&b18_req\",\n \"Read Req\": \"&b18_readReq\",\n \"Write Req\": \"&b18_writeReq\",\n \"AtomicReq\": \"&b18_atomicReq\",\n \"EA Read Req\": \"&b18_eaReadReq\",\n \"EA Write Req\": \"&b18_eaWriteReq\",\n \"EA AtomicReq\": \"&b18_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b18_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b18_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b18_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b18_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b18_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b18_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b18_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b18_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b18_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b18_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"19\",\n \"Hit Rate\": \"&b19_hitRate\",\n \"Req\": \"&b19_req\",\n \"Read Req\": \"&b19_readReq\",\n \"Write Req\": \"&b19_writeReq\",\n \"AtomicReq\": \"&b19_atomicReq\",\n \"EA Read Req\": \"&b19_eaReadReq\",\n \"EA Write Req\": \"&b19_eaWriteReq\",\n \"EA AtomicReq\": \"&b19_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b19_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b19_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b19_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b19_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b19_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b19_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b19_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b19_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b19_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b19_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"20\",\n \"Hit Rate\": \"&b20_hitRate\",\n \"Req\": \"&b20_req\",\n \"Read Req\": \"&b20_readReq\",\n \"Write Req\": \"&b20_writeReq\",\n \"AtomicReq\": \"&b20_atomicReq\",\n \"EA Read Req\": \"&b20_eaReadReq\",\n \"EA Write Req\": \"&b20_eaWriteReq\",\n \"EA AtomicReq\": \"&b20_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b20_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b20_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b20_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b20_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b20_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b20_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b20_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b20_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b20_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b20_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"21\",\n \"Hit Rate\": \"&b21_hitRate\",\n \"Req\": \"&b21_req\",\n \"Read Req\": \"&b21_readReq\",\n \"Write Req\": \"&b21_writeReq\",\n \"AtomicReq\": \"&b21_atomicReq\",\n \"EA Read Req\": \"&b21_eaReadReq\",\n \"EA Write Req\": \"&b21_eaWriteReq\",\n \"EA AtomicReq\": \"&b21_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b21_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b21_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b21_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b21_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b21_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b21_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b21_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b21_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b21_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b21_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"22\",\n \"Hit Rate\": \"&b22_hitRate\",\n \"Req\": \"&b22_req\",\n \"Read Req\": \"&b22_readReq\",\n \"Write Req\": \"&b22_writeReq\",\n \"AtomicReq\": \"&b22_atomicReq\",\n \"EA Read Req\": \"&b22_eaReadReq\",\n \"EA Write Req\": \"&b22_eaWriteReq\",\n \"EA AtomicReq\": \"&b22_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b22_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b22_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b22_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b22_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b22_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b22_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b22_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b22_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b22_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b22_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"23\",\n \"Hit Rate\": \"&b23_hitRate\",\n \"Req\": \"&b23_req\",\n \"Read Req\": \"&b23_readReq\",\n \"Write Req\": \"&b23_writeReq\",\n \"AtomicReq\": \"&b23_atomicReq\",\n \"EA Read Req\": \"&b23_eaReadReq\",\n \"EA Write Req\": \"&b23_eaWriteReq\",\n \"EA AtomicReq\": \"&b23_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b23_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b23_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b23_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b23_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b23_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b23_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b23_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b23_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b23_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b23_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"24\",\n \"Hit Rate\": \"&b24_hitRate\",\n \"Req\": \"&b24_req\",\n \"Read Req\": \"&b24_readReq\",\n \"Write Req\": \"&b24_writeReq\",\n \"AtomicReq\": \"&b24_atomicReq\",\n \"EA Read Req\": \"&b24_eaReadReq\",\n \"EA Write Req\": \"&b24_eaWriteReq\",\n \"EA AtomicReq\": \"&b24_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b24_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b24_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b24_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b24_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b24_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b24_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b24_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b24_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b24_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b24_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"25\",\n \"Hit Rate\": \"&b25_hitRate\",\n \"Req\": \"&b25_req\",\n \"Read Req\": \"&b25_readReq\",\n \"Write Req\": \"&b25_writeReq\",\n \"AtomicReq\": \"&b25_atomicReq\",\n \"EA Read Req\": \"&b25_eaReadReq\",\n \"EA Write Req\": \"&b25_eaWriteReq\",\n \"EA AtomicReq\": \"&b25_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b25_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b25_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b25_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b25_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b25_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b25_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b25_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b25_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b25_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b25_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"26\",\n \"Hit Rate\": \"&b26_hitRate\",\n \"Req\": \"&b26_req\",\n \"Read Req\": \"&b26_readReq\",\n \"Write Req\": \"&b26_writeReq\",\n \"AtomicReq\": \"&b26_atomicReq\",\n \"EA Read Req\": \"&b26_eaReadReq\",\n \"EA Write Req\": \"&b26_eaWriteReq\",\n \"EA AtomicReq\": \"&b26_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b26_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b26_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b26_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b26_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b26_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b26_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b26_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b26_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b26_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b26_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"27\",\n \"Hit Rate\": \"&b27_hitRate\",\n \"Req\": \"&b27_req\",\n \"Read Req\": \"&b27_readReq\",\n \"Write Req\": \"&b27_writeReq\",\n \"AtomicReq\": \"&b27_atomicReq\",\n \"EA Read Req\": \"&b27_eaReadReq\",\n \"EA Write Req\": \"&b27_eaWriteReq\",\n \"EA AtomicReq\": \"&b27_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b27_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b27_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b27_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b27_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b27_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b27_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b27_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b27_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b27_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b27_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"28\",\n \"Hit Rate\": \"&b28_hitRate\",\n \"Req\": \"&b28_req\",\n \"Read Req\": \"&b28_readReq\",\n \"Write Req\": \"&b28_writeReq\",\n \"AtomicReq\": \"&b28_atomicReq\",\n \"EA Read Req\": \"&b28_eaReadReq\",\n \"EA Write Req\": \"&b28_eaWriteReq\",\n \"EA AtomicReq\": \"&b28_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b28_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b28_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b28_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b28_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b28_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b28_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b28_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b28_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b28_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b28_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"29\",\n \"Hit Rate\": \"&b29_hitRate\",\n \"Req\": \"&b29_req\",\n \"Read Req\": \"&b29_readReq\",\n \"Write Req\": \"&b29_writeReq\",\n \"AtomicReq\": \"&b29_atomicReq\",\n \"EA Read Req\": \"&b29_eaReadReq\",\n \"EA Write Req\": \"&b29_eaWriteReq\",\n \"EA AtomicReq\": \"&b29_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b29_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b29_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b29_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b29_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b29_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b29_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b29_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b29_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b29_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b29_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"30\",\n \"Hit Rate\": \"&b30_hitRate\",\n \"Req\": \"&b30_req\",\n \"Read Req\": \"&b30_readReq\",\n \"Write Req\": \"&b30_writeReq\",\n \"AtomicReq\": \"&b30_atomicReq\",\n \"EA Read Req\": \"&b30_eaReadReq\",\n \"EA Write Req\": \"&b30_eaWriteReq\",\n \"EA AtomicReq\": \"&b30_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b30_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b30_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b30_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b30_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b30_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b30_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b30_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b30_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b30_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b30_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"31\",\n \"Hit Rate\": \"&b31_hitRate\",\n \"Req\": \"&b31_req\",\n \"Read Req\": \"&b31_readReq\",\n \"Write Req\": \"&b31_writeReq\",\n \"AtomicReq\": \"&b31_atomicReq\",\n \"EA Read Req\": \"&b31_eaReadReq\",\n \"EA Write Req\": \"&b31_eaWriteReq\",\n \"EA AtomicReq\": \"&b31_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b31_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b31_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b31_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b31_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b31_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b31_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b31_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b31_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b31_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b31_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 16-31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 89 + }, + "id": 93, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 89 + }, + "id": 94, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 97 + }, + "id": 187, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 97 + }, + "id": 201, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 220, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 105 + }, + "id": 227, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 221, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 113 + }, + "id": 228, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 121 + }, + "id": 222, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 121 + }, + "id": 229, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 129 + }, + "id": 223, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 129 + }, + "id": 230, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 137 + }, + "id": 225, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 137 + }, + "id": 231, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 145 + }, + "id": 224, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 145 + }, + "id": 232, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 153 + }, + "id": 226, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 153 + }, + "id": 233, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache (per Channel)", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 34, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + "hide": 0, + "includeAll": false, + "label": "Normalization", + "multi": false, + "name": "normUnit", + "options": [ + { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + { + "selected": false, + "text": "\"per Cycle\"", + "value": "\"per Cycle\"" + }, + { + "selected": false, + "text": "\"per Sec\"", + "value": "\"per Sec\"" + }, + { + "selected": false, + "text": "\"per Kernel\"", + "value": "\"per Kernel\"" + } + ], + "query": "\"per Wave\",\n\"per Cycle\",\n\"per Sec\",\n\"per Kernel\"", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "L2 Channels", + "multi": false, + "name": "L2Banks", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SEs", + "multi": false, + "name": "numSE", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "104", + "value": "104" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#CUs", + "multi": false, + "name": "numCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SCLK (MHz)", + "multi": false, + "name": "sclk", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SQC", + "multi": false, + "name": "numSQC", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "HBM BW (GB/s)", + "multi": false, + "name": "hbmBW", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "rocprofiler-compute_asw_mixbench_mi200", + "value": "rocprofiler-compute_asw_mixbench_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Workload", + "multi": false, + "name": "Workload1", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "435646", + "value": "435646" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Kernel Cycles", + "multi": false, + "name": "kernelBusyCycles", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "103", + "value": "103" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Active CUs", + "multi": false, + "name": "numActiveCUs", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Filtered Dispatch ID", + "multi": false, + "name": "DispatchIDFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Dispatch Filter", + "name": "DispatchID", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "GCD", + "multi": false, + "name": "gpuFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Kernels", + "multi": true, + "name": "KernelNameFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "rocprofiler-compute_asw_mixbench_mi200", + "value": "rocprofiler-compute_asw_mixbench_mi200" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline Workload", + "multi": false, + "name": "Workload2", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "103", + "value": "103" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline ActiveCUs", + "multi": false, + "name": "numActiveCUs2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Baseline Dispatch IDs", + "multi": false, + "name": "DispatchIDFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Index\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID2:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Baseline Dispatch Filter", + "name": "DispatchID2", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline GCD", + "multi": false, + "name": "gpuFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu-id\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Baseline Kernels", + "multi": true, + "name": "KernelNameFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&KernelName\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "System Info" + ], + "value": [ + "System Info" + ] + }, + "hide": 0, + "includeAll": false, + "label": "Comparison Panels", + "multi": true, + "name": "select", + "options": [ + { + "selected": true, + "text": "System Info", + "value": "System Info" + }, + { + "selected": false, + "text": "System Speed-of-Light", + "value": "System Speed-of-Light" + }, + { + "selected": false, + "text": "Roofline", + "value": "Roofline" + }, + { + "selected": false, + "text": "Command Processor", + "value": "Command Processor" + }, + { + "selected": false, + "text": "Shader Processor Input", + "value": "Shader Processor Input" + }, + { + "selected": false, + "text": "Wavefront", + "value": "Wavefront" + }, + { + "selected": false, + "text": "Compute Pipeline", + "value": "Compute Pipeline" + }, + { + "selected": false, + "text": "Instruction Mix", + "value": "Instruction Mix" + }, + { + "selected": false, + "text": "Local Data Share", + "value": "Local Data Share" + }, + { + "selected": false, + "text": "Instruction Cache", + "value": "Instruction Cache" + }, + { + "selected": false, + "text": "Scalar L1D Cache", + "value": "Scalar L1D Cache" + }, + { + "selected": false, + "text": "Texture Addr and Data", + "value": "Texture Addr and Data" + }, + { + "selected": false, + "text": "Vector L1D Cache", + "value": "Vector L1D Cache" + }, + { + "selected": false, + "text": "L2 Cache", + "value": "L2 Cache" + } + ], + "query": "System Info, \nSystem Speed-of-Light, \nRoofline,\nCommand Processor, \nShader Processor Input, \nWavefront,\nCompute Pipeline, \nInstruction Mix,\nLocal Data Share, \nInstruction Cache, \nScalar L1D Cache, \nTexture Addr and Data, \nVector L1D Cache,\nL2 Cache", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline L2 Channels", + "multi": false, + "name": "L2Banks2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&L2Banks\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SEs", + "multi": false, + "name": "numSE2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSE\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "104", + "value": "104" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #CUs", + "multi": false, + "name": "numCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline Max Waves/CU", + "multi": false, + "name": "maxWavesPerCU2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&maxWavesPerCU\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline SCLK (MHz)", + "multi": false, + "name": "sclk2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SQC", + "multi": false, + "name": "numSQC2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&numSQC\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline HBM BW (GB/s)", + "multi": false, + "name": "hbmBW2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbmBW\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "mi200", + "value": "mi200" + }, + "definition": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SOC", + "multi": false, + "name": "soc", + "options": [], + "query": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "5", + "value": "5" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "TopN", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "20", + "value": "20" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "query": "1,5,10,15,20,50,100", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2021-11-04T14:21:39.749Z", + "to": "2021-11-08T14:21:39.749Z" + }, + "timepicker": {}, + "timezone": "", + "title": "rocprofiler-compute_v1.0.8_pub", + "uid": "MIPerf_v1_0_0630202211210", + "version": 4, + "weekStart": "" +} diff --git a/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v2.0.0_pub.json b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v2.0.0_pub.json new file mode 100644 index 0000000000..77261f04c8 --- /dev/null +++ b/projects/rocprofiler-compute/grafana/dashboards/RocProfCompute_v2.0.0_pub.json @@ -0,0 +1,13415 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 12, + "iteration": 1710183556815, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 217, + "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 23, + "w": 13, + "x": 0, + "y": 1 + }, + "id": 159, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.sysinfo.aggregate([\n {\"$project\": {\n \"_id\": 0,\n \"timestamp\":1,\n \"command\": 1,\n \"hostname\": 1,\n \"cpu_model\": 1,\n \"linux_distro\": 1,\n \"linux_kernel_version\": 1,\n \"rocm_version\": 1,\n \"gpu_model\": 1,\n \"gpu_arch\": 1,\n \"se_per_gpu\": 1,\n \"sqc_per_gpu\": 1,\n \"cu_per_gpu\": 1,\n \"simd_per_cu\": 1,\n \"waveSize\": 1,\n \"max_waves_per_cu\": 1,\n \"workgroup_max_size\":1,\n \"gpu_l1\":1,\n \"gpu_l2\":1,\n \"lds_banks_per_cu\": 1,\n \"max_sclk\":1,\n \"max_mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbm_bw\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"×tamp\"\n },\n {\n \"Metric\":\"App Command\",\n \"Value\": \"&command\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&hostname\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&cpu_model\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&linux_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&linux_kernel_version\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&rocm_version\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&gpu_model\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_arch\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&se_per_gpu\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&sqc_per_gpu\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&cu_per_gpu\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&simd_per_cu\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&max_waves_per_cu\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&workgroup_max_size\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&gpu_l1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&gpu_l2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&lds_banks_per_cu\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&max_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&max_mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbm_bw\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.sysinfo.aggregate([\n {\"$match\": {\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(System Info)\"}}\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"timestamp\":1,\n \"command\": 1,\n \"hostname\": 1,\n \"cpu_model\": 1,\n \"linux_distro\": 1,\n \"linux_kernel_version\": 1,\n \"rocm_version\": 1,\n \"gpu_model\": 1,\n \"gpu_arch\": 1,\n \"se_per_gpu\": 1,\n \"sqc_per_gpu\": 1,\n \"cu_per_gpu\": 1,\n \"simd_per_cu\": 1,\n \"waveSize\": 1,\n \"max_waves_per_cu\": 1,\n \"workgroup_max_size\":1,\n \"gpu_l1\":1,\n \"gpu_l2\":1,\n \"lds_banks_per_cu\": 1,\n \"max_sclk\":1,\n \"max_mclk\":1,\n \"cur_sclk\": 1,\n \"cur_mclk\":1,\n \"hbm_bw\":1\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\":\"Date\",\n \"Value\": \"×tamp\"\n },\n {\n \"Metric\":\"App Command\",\n \"Value\": \"&command\"\n },\n {\n \"Metric\":\"Host Name\",\n \"Value\": \"&hostname\"\n },\n {\n \"Metric\":\"Host CPU\",\n \"Value\": \"&cpu_model\"\n },\n {\n \"Metric\":\"Host Distro\",\n \"Value\": \"&linux_distro\"\n },\n {\n \"Metric\":\"Host Kernel\",\n \"Value\": \"&linux_kernel_version\"\n },\n {\n \"Metric\":\"ROCm Version\",\n \"Value\": \"&rocm_version\"\n },\n {\n \"Metric\":\"GFX SoC\",\n \"Value\": \"&gpu_model\"\n },\n {\n \"Metric\":\"GFX ID\",\n \"Value\": \"&gpu_arch\"\n },\n {\n \"Metric\":\"Total SEs\",\n \"Value\":\"&se_per_gpu\"\n },\n {\n \"Metric\":\"Total SQCs\",\n \"Value\":\"&sqc_per_gpu\"\n },\n {\n\n \"Metric\":\"Total CUs\",\n \"Value\":\"&cu_per_gpu\"\n },\n {\n \"Metric\":\"SIMDs/CU\",\n \"Value\": \"&simd_per_cu\"\n },\n {\n \"Metric\":\"Max Wavefronts Occupancy Per CU\",\n \"Value\":\"&max_waves_per_cu\"\n },\n {\n \"Metric\":\"Max Workgroup Size\",\n \"Value\":\"&workgroup_max_size\"\n },\n {\n \"Metric\":\"L1Cache per CU (KB)\",\n \"Value\":\"&gpu_l1\"\n },\n {\n \"Metric\":\"L2Cache (KB)\",\n \"Value\":\"&gpu_l2\"\n },\n {\n \"Metric\":\"L2Cache Channels\",\n \"Value\":\"&lds_banks_per_cu\"\n },\n {\n \"Metric\":\"Sys Clock (Max) - MHz\",\n \"Value\":\"&max_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Max) - MHz\",\n \"Value\":\"&max_mclk\"\n },\n {\n \"Metric\":\"Sys Clock (Cur) - MHz\",\n \"Value\":\"&cur_sclk\"\n \n },\n {\n \"Metric\":\"Memory Clock (Cur) - MHz\",\n \"Value\":\"&cur_mclk\"\n },\n {\n \"Metric\":\"HBM Bandwidth - GB/s\",\n \"Value\":\"&hbm_bw\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "System Info", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true + }, + "indexByName": {}, + "renameByName": { + "Value 1": "Current", + "Value 2": "Baseline" + } + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Info", + "type": "row" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 108, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "System Speed-of-Light", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "decimals": 0, + "links": [], + "mappings": [ + { + "options": { + "match": "false", + "result": { + "index": 0 + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Percent of Peak - PoP" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.width", + "value": 252 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit 1" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 137 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 125 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 161 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 15, + "x": 0, + "y": 2 + }, + "id": 110, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu, 4] }] }\n },\n\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]}, \n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]}, \n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]}, \n {\"$multiply\": [$sclk, $cu_per_gpu, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs,\n \"Unit\": \"CUs\",\n \"peak\": $cu_per_gpu,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $cu_per_gpu]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk, $cu_per_gpu, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $sqc_per_gpu]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $sqc_per_gpu]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $sqc_per_gpu]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $sqc_per_gpu]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $cu_per_gpu]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $cu_per_gpu]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n },\n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$max_waves_per_cu, $cu_per_gpu] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$max_waves_per_cu, $cu_per_gpu] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $sqc_per_gpu]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$sqc_per_gpu, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }}\n\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }]\n }},\n \n \"valu_intOps_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n },\n \n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \n \n \"salu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2] }] }\n },\n \n \"valu_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2] }] }\n },\n \n \"mfma_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2, 4] }] }\n },\n \n \"lds_bconf\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"lds_bw\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu2\"}\n ]}, \n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]}\n },\n \n \"lds_bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu2\"}\n ]}, \n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]}, \n {\"$multiply\": [$sclk2, $cu_per_gpu2, 0.00128]}\n ]}\n },\n \n \"unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \n \"ipcIssue_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"eaWriteLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null \n ]\n }\n },\n \"eaReadLat_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null \n ]\n }\n },\n \"eaWriteBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n }, \n \"eaReadBW_val\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n },\n \"l2_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null \n ]\n }\n },\n \"vecl1_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vecl1_BW_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \"l1k_cacheHits_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n null\n ]\n }\n },\n \"l1i_hitRate_val\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n },\n \"l1i_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }, 64] }\n },\n \"l1k_BW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }, 64] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"VALU FLOPs\",\n \"Value\": \"&valu_flops_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"VALU IOPs\",\n \"Value\": \"&valu_intOps_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 64, 2 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 64, 2 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (BF16)\",\n \"Value\": \"&mfma_flops_bf16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 512 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 512 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F16)\",\n \"Value\": \"&mfma_flops_f16_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F32)\",\n \"Value\": \"&mfma_flops_f32_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA FLOPs (F64)\",\n \"Value\": \"&mfma_flops_f64_val\",\n \"Unit\": \"GFLOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 256 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 256 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"MFMA IOPs (Int8)\",\n \"Value\": \"&mfma_flops_i8_val\",\n \"Unit\": \"GIOP\",\n \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 1024 ]}, 1000] },\n \"Percent of Peak - PoP\": {\n \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 1024 ]}, 1000] }]\n }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Value\": $numActiveCUs2,\n \"Unit\": \"CUs\",\n \"peak\": $cu_per_gpu2,\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs2] }, $cu_per_gpu2]}\n },\n \n {\n \"Metric\": \"SALU Util\",\n \"Value\": \"&salu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&salu_val\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Value\": \"&valu_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&valu_val\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Value\": \"&mfma_val\",\n \"Unit\": \"pct\",\n \"peak\": 100,\n \"Percent of Peak - PoP\": \"&mfma_val\"\n },\n {\n \"Metric\": \"VALU Active Threads/Wave\",\n \"Value\": \"&unpredthreads_val\",\n \"Unit\": \"Threads\",\n \"peak\": 64,\n \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n },\n {\n \"Metric\": \"IPC - Issue\",\n \"Value\": \"&ipcIssue_val\",\n \"Unit\": \"Instr/cycle\",\n \"peak\": 5,\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n },\n {\n \"Metric\": \"LDS BW\",\n \"Value\": \"&lds_bw\",\n \"Unit\": \"GB/sec\",\n \"peak\": {\"$multiply\": [$sclk2, $cu_per_gpu2, 0.128]},\n \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n },\n {\n \"Metric\": \"LDS Bank Conflict\",\n \"Value\": \"&lds_bconf\",\n \"Unit\": \"Conflicts/access\",\n \"peak\": \"32\",\n \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n },\n {\n \"Metric\": \"Instr Cache Hit Rate\",\n \"Value\": \"&l1i_hitRate_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n }, \n {\n \"Metric\": \"Instr Cache BW\",\n \"Value\": \"&l1i_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $sqc_per_gpu2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $sqc_per_gpu2]}] }\n },\n {\n \"Metric\": \"Scalar L1D Cache Hit Rate\",\n \"Value\": \"&l1k_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n },\n {\n \"Metric\": \"Scalar L1D Cache BW\",\n \"Value\": \"&l1k_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $sqc_per_gpu2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $sqc_per_gpu2]}] }\n },\n\n {\n \"Metric\": \"Vector L1D Cache Hit Rate\",\n \"Value\": \"&vecl1_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n },\n {\n \"Metric\": \"Vector L1D Cache BW\",\n \"Value\": \"&vecl1_BW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $cu_per_gpu2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $cu_per_gpu2]}] }\n },\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Value\": \"&l2_cacheHits_val\",\n \"Unit\": \"pct\",\n \"peak\": \"100\",\n \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n }, \n {\n \"Metric\": \"L2-Fabric Read BW\",\n \"Value\": \"&eaReadBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Write BW\",\n \"Value\": \"&eaWriteBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": \"$hbmBW2\",\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW2] }\n },\n {\n \"Metric\": \"L2-Fabric Read Latency\",\n \"Value\": \"&eaReadLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n },\n {\n \"Metric\": \"L2-Fabric Write Latency\",\n \"Value\": \"&eaWriteLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"waveOcc_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n },\n \"waveOcc_pop\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$max_waves_per_cu2, $cu_per_gpu2] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Value\": \"&waveOcc_val\",\n \"Unit\": \"Wavefronts\",\n \"peak\": { \"$multiply\": [$max_waves_per_cu2, $cu_per_gpu2] },\n \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n ]\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_IFETCH_LEVEL\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"System Speed-of-Light\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"instrFetchBW_val\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }, 32] }\n },\n \"instrFetchLat_val\": {\n \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Instr Fetch BW\",\n \"Value\": \"&instrFetchBW_val\",\n \"Unit\": \"GB/s\",\n \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }, $sqc_per_gpu2]},\n \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$sqc_per_gpu2, { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 32] }] }] }\n },\n {\n \"Metric\": \"Instr Fetch Latency\",\n \"Value\": \"&instrFetchLat_val\",\n \"Unit\": \"Cycles\",\n \"peak\": \"\",\n \"Percent of Peak - PoP\": \"\"\n\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]\n }}\n\n ]);", + "type": "table" + } + ], + "title": "Speed of Light", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Metric 1": 0, + "Metric 2": 7, + "Percent of Peak - PoP 1": 5, + "Percent of Peak - PoP 2": 6, + "Unit 1": 8, + "Unit 2": 9, + "Value 1": 1, + "Value 2": 2, + "peak 1": 3, + "peak 2": 4 + }, + "renameByName": { + "Percent of Peak - PoP": "Pct-of-Peak", + "Percent of Peak - PoP 1": "Pct-of-Peak (Current)", + "Percent of Peak - PoP 2": "Pct-of-Peak (Baseline)", + "Unit": "", + "Value": "Avg", + "Value 1": "Avg (Current)", + "Value 2": "Avg (Baseline)", + "peak": "Theoretical Max", + "peak 1": "Theoretical Max (Current)", + "peak 2": "Theoretical Max (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 175, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Dispatch_ID\",\n \"Kernel Name\": \"&Kernel_Name\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n],\n{ allowDiskUse: true }\n);", + "type": "table" + } + ], + "title": "Dispatch IDs - Current", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dispatch ID" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + } + ] + }, + "gridPos": { + "h": 29, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 215, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Dispatch ID\": \"&Dispatch_ID\",\n \"Kernel Name\": \"&Kernel_Name\"\n }},\n {\"$sort\": {\n \"Dispatch ID\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Dispatch IDs - Baseline", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "_id": "Dispatch ID" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 36, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Kernel Statistics", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 157, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + } + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "u5Z2zJhnk" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"elapsedTime1\": {\n \"$divide\": [{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}, 1000]\n }\n }},\n\n {\"$project\": {\n \"_id\": 0,\n \"elapsedTime1\": 1\n }}\n]);", + "type": "table" + } + ], + "title": "Kernel Time Histogram", + "transparent": true, + "type": "histogram" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 123 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Performance" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Peak FLOPs" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 213, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "L1 Cache (Bytes)" + } + ] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&Kernel_Name\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128 ]} \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n \n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n }}\n]);", + "type": "table" + } + ], + "title": "Top Kernels", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Name", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "peak_flops": "Peak FLOPs", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TotalDuration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Duration" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + }, + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS " + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L1 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Cache" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.width", + "value": 87 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Calls" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Duration" + }, + "properties": [ + { + "id": "custom.width", + "value": 153 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 143 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (BF16)" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F32)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "MFMA FLOPs (F64)" + }, + "properties": [ + { + "id": "custom.width", + "value": 146 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Throughput" + }, + "properties": [ + { + "id": "unit", + "value": "gflops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VALU FLOPs" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (HBM)" + }, + "properties": [ + { + "id": "custom.width", + "value": 89 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L2 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "AI (L1 Cache)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "LDS (Bytes)" + }, + "properties": [ + { + "id": "custom.width", + "value": 98 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HBM" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + }, + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dispatch" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 251, + "interval": "2h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "HV80ot2nz" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n { \"$group\": { \n \"_id\": \"&Dispatch_ID\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n\n \"Throughput\": {\n \"$avg\": { \"$divide\": [\n \n {\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]},\n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]}\n },\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"LDS_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 128]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n },\n\n \"hbm_bw\": {\n \"$avg\": {\n \"$divide\": [\n {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n },\n {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]\n\n }\n }\n\n }},\n \n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": $TopN },\n\n {\"$addFields\": {\n \"ai_L1\": { \"$cond\": [\n {\"$ne\": [\"&L1cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"]},\n \"\"\n ]},\n \"ai_L2\": { \"$cond\": [\n {\"$ne\": [\"&L2cache_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"]},\n \"\"\n ]},\n \"ai_hbm\": { \"$cond\": [\n {\"$ne\": [\"&hbm_data\", 0]},\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"]},\n \"\"\n ]}\n\n }}\n]);", + "type": "table" + } + ], + "title": "Top Dispatches", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "AvgDuration": 5, + "Calls": 1, + "L1cache_data": 16, + "L2cache_data": 17, + "LDS_data": 15, + "Throughput": 2, + "TotalDuration": 4, + "_id": 0, + "ai_L1": 6, + "ai_L2": 7, + "ai_hbm": 8, + "hbm_bw": 3, + "hbm_data": 18, + "mfma_flops_bf16": 12, + "mfma_flops_f16": 11, + "mfma_flops_f32": 13, + "mfma_flops_f64": 14, + "peak_flops": 19, + "total_flops": 9, + "valu_flops": 10 + }, + "renameByName": { + "AvgDuration": "Avg Duration", + "Calls": "", + "L1cache_data": "Vector L1D Cache", + "L2cache_data": "L2 Cache", + "LDS_data": "LDS ", + "Throughput": "Performance", + "TotalDuration": "Total Duration", + "_id": "Dispatch", + "ai_L1": "AI (Vector L1D Cache)", + "ai_L2": "AI (L2 Cache)", + "ai_hbm": "AI (HBM)", + "hbm_bw": "HBM BW ", + "hbm_data": "HBM", + "mfma_flops_bf16": "MFMA FLOPs (BF16)", + "mfma_flops_f16": "MFMA FLOPs (F16)", + "mfma_flops_f32": "MFMA FLOPs (F32)", + "mfma_flops_f64": "MFMA FLOPs (F64)", + "total_flops": "Total FLOPs", + "valu_flops": "VALU FLOPs" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 65 + }, + "id": 40, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Memory Chart Analysis", + "type": "row" + }, + { + "description": "All transaction units default to Billion, when per-sec norm is used", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 285, + "options": { + "addAllIDs": false, + "captureMappings": false, + "eventAutoComplete": true, + "eventSource": "options.animateLogo(svgmap, data);\r\nconsole.log(\"Starting render\");\r\nlet buff = data.series[0].fields[2].values.buffer;\r\nlet valueCount = buff.length;\r\nconsole.log(\"The buff is \", valueCount, \" long\");\r\n\r\nsvgmap.wave_life_.text(buff[0]);\r\nsvgmap.active_cu_.text(buff[1]);\r\nsvgmap.salu_.text(buff[2]);\r\nsvgmap.smem_.text(buff[3]);\r\nsvgmap.valu_.text(buff[4]);\r\nsvgmap.mfma_.text(buff[5]);\r\nsvgmap.vmem_.text(buff[6]);\r\nsvgmap.lds_.text(buff[7]);\r\nsvgmap.gws_.text(buff[8]);\r\nsvgmap.br_.text(buff[9]);\r\nsvgmap.vgpr_.text(buff[10]);\r\nsvgmap.sgpr_.text(buff[11]);\r\nsvgmap.lds_alloc_.text(buff[12]);\r\nsvgmap.scratch_alloc_.text(buff[13]);\r\nsvgmap.wavefronts_.text(buff[14]);\r\nsvgmap.workgroups_.text(buff[15]);\r\nsvgmap.lds_req_.text(buff[16]);\r\nsvgmap.il1_fetch_.text(buff[17]);\r\nsvgmap.il1_hit_.text(buff[18]);\r\nsvgmap.il1_l2_rd_.text(buff[19]);\r\nsvgmap.sl1_rd_.text(buff[20]);\r\nsvgmap.sl1_hit_.text(buff[21]);\r\nsvgmap.sl1_l2_rd_.text(buff[22]);\r\nsvgmap.sl1_l2_wr_.text(buff[23]);\r\nsvgmap.sl1_l2_atom_.text(buff[24]);\r\nsvgmap.vl1_rd_.text(buff[25]);\r\nsvgmap.vl1_wr_.text(buff[26]);\r\nsvgmap.vl1_atom_.text(buff[27]);\r\nsvgmap.vl1_hit_.text(buff[28]);\r\nsvgmap.vl1_lat_.text(buff[29]);\r\nsvgmap.vl1_l2_rd_.text(buff[30]);\r\nsvgmap.vl1_l2_wr_.text(buff[31]);\r\nsvgmap.vl1_l2_atom_.text(buff[32]);\r\nsvgmap.l2_rd_.text(buff[33]);\r\nsvgmap.l2_wr_.text(buff[34])\r\nsvgmap.l2_atom_.text(buff[35]);\r\nsvgmap.l2_hit_.text(buff[36]);\r\nsvgmap.l2_rd_lat_.text(buff[37]);\r\nsvgmap.l2_wr_lat_.text(buff[38]);\r\nsvgmap.fabric_rd_lat_.text(buff[39]);\r\nsvgmap.fabric_wr_lat_.text(buff[40]);\r\nsvgmap.fabric_atom_lat_.text(buff[41]);\r\nsvgmap.l2_fabric_rd_.text(buff[42]);\r\nsvgmap.l2_fabric_wr_.text(buff[43]);\r\nsvgmap.l2_fabric_atom_.text(buff[44]);\r\nsvgmap.hbm_rd_.text(buff[45]);\r\nsvgmap.hbm_wr_.text(buff[46]);\r\nsvgmap.lds_util_.text(buff[47]);\r\nsvgmap.vl1_coales_.text(buff[48]);\r\nsvgmap.vl1_stall_.text(buff[49]);\r\nsvgmap.wave_occ_.text(buff[50]);\r\nsvgmap.lds_lat_.text(buff[51]);\r\nsvgmap.il1_lat_.text(buff[52]);\r\nsvgmap.sl1_lat_.text(buff[53]);\r\nsvgmap.gds_req_.text(buff[54]);", + "initAutoComplete": true, + "initSource": "options.animateLogo = (svgmap, data) => {\r\n \r\n}\r\n ", + "svgMappings": [ + { + "mappedName": "wave_life_", + "svgId": "wave_life" + }, + { + "mappedName": "wave_occ_", + "svgId": "wave_occ" + }, + { + "mappedName": "salu_", + "svgId": "salu" + }, + { + "mappedName": "smem_", + "svgId": "smem" + }, + { + "mappedName": "valu_", + "svgId": "valu" + }, + { + "mappedName": "mfma_", + "svgId": "mfma" + }, + { + "mappedName": "vmem_", + "svgId": "vmem" + }, + { + "mappedName": "lds_", + "svgId": "lds" + }, + { + "mappedName": "gws_", + "svgId": "gws" + }, + { + "mappedName": "br_", + "svgId": "br" + }, + { + "mappedName": "active_cu_", + "svgId": "active_cu" + }, + { + "mappedName": "vgpr_", + "svgId": "vgpr" + }, + { + "mappedName": "sgpr_", + "svgId": "sgpr" + }, + { + "mappedName": "lds_alloc_", + "svgId": "lds_alloc" + }, + { + "mappedName": "scratch_alloc_", + "svgId": "scratch_alloc" + }, + { + "mappedName": "wavefronts_", + "svgId": "wavefronts" + }, + { + "mappedName": "workgroups_", + "svgId": "workgroups" + }, + { + "mappedName": "lds_req_", + "svgId": "lds_req" + }, + { + "mappedName": "vl1_wr_", + "svgId": "vl1_wr" + }, + { + "mappedName": "vl1_atom_", + "svgId": "vl1_atom" + }, + { + "mappedName": "sl1_rd_", + "svgId": "sl1_rd" + }, + { + "mappedName": "il1_fetch_", + "svgId": "il1_fetch" + }, + { + "mappedName": "lds_lat_", + "svgId": "lds_lat" + }, + { + "mappedName": "lds_bw_", + "svgId": "lds_bw" + }, + { + "mappedName": "lds_util_", + "svgId": "lds_util" + }, + { + "mappedName": "vl1_hit_", + "svgId": "vl1_hit" + }, + { + "mappedName": "vl1_lat_", + "svgId": "vl1_lat" + }, + { + "mappedName": "vl1_coales_", + "svgId": "vl1_coales" + }, + { + "mappedName": "vl1_stall_", + "svgId": "vl1_stall" + }, + { + "mappedName": "sl1_hit_", + "svgId": "sl1_hit" + }, + { + "mappedName": "sl1_lat_", + "svgId": "sl1_lat" + }, + { + "mappedName": "il1_hit_", + "svgId": "il1_hit" + }, + { + "mappedName": "il1_lat_", + "svgId": "il1_lat" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "il1_l2_rd_", + "svgId": "il1_l2_rd" + }, + { + "mappedName": "sl1_l2_rd_", + "svgId": "sl1_l2_rd" + }, + { + "mappedName": "sl1_l2_wr_", + "svgId": "sl1_l2_wr" + }, + { + "mappedName": "sl1_l2_atom_", + "svgId": "sl1_l2_atom" + }, + { + "mappedName": "l2_rd_", + "svgId": "l2_rd" + }, + { + "mappedName": "l2_wr_", + "svgId": "l2_wr" + }, + { + "mappedName": "l2_atom_", + "svgId": "l2_atom" + }, + { + "mappedName": "l2_hit_", + "svgId": "l2_hit" + }, + { + "mappedName": "l2_rd_lat_", + "svgId": "l2_rd_lat" + }, + { + "mappedName": "l2_wr_lat_", + "svgId": "l2_wr_lat" + }, + { + "mappedName": "l2_fabric_rd_", + "svgId": "l2_fabric_rd" + }, + { + "mappedName": "l2_fabric_wr_", + "svgId": "l2_fabric_wr" + }, + { + "mappedName": "l2_fabric_atom_", + "svgId": "l2_fabric_atom" + }, + { + "mappedName": "fabric_rd_lat_", + "svgId": "fabric_rd_lat" + }, + { + "mappedName": "fabric_wr_lat_", + "svgId": "fabric_wr_lat" + }, + { + "mappedName": "fabric_atom_lat_", + "svgId": "fabric_atom_lat" + }, + { + "mappedName": "fabric_hbm_rd_", + "svgId": "fabric_hbm_rd" + }, + { + "mappedName": "fabric_hbm_wr_", + "svgId": "fabric_hbm_wr" + }, + { + "mappedName": "vl1_rd_", + "svgId": "vl1_rd" + }, + { + "mappedName": "vl1_l2_rd_", + "svgId": "vl1_l2_rd" + }, + { + "mappedName": "vl1_l2_wr_", + "svgId": "vl1_l2_wr" + }, + { + "mappedName": "vl1_l2_atom_", + "svgId": "vl1_l2_atom" + }, + { + "mappedName": "hbm_rd_", + "svgId": "hbm_rd" + }, + { + "mappedName": "hbm_wr_", + "svgId": "hbm_wr" + } + ], + "svgSource": "\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n Wave Occupancy\r\n \r\n Wave Life\r\n \r\n \r\n \r\n xGMI /\r\n PCIe\r\n \r\n GMI\r\n \r\n HBM\r\n \r\n Fabric\r\n \r\n \r\n SALU:\r\n 00000\r\n \r\n \r\n SMEM:\r\n 00000\r\n \r\n \r\n VALU:\r\n 00000\r\n \r\n \r\n MFMA:\r\n 00000\r\n \r\n \r\n VMEM:\r\n 00000\r\n \r\n \r\n LDS:\r\n 00000\r\n \r\n \r\n GWS:\r\n 00000\r\n \r\n \r\n Br:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n \r\n \r\n cycles\r\n Atomic:\r\n 00000\r\n \r\n \r\n Rd:\r\n 00000\r\n \r\n \r\n Wr:\r\n \r\n \r\n 00000\r\n \r\n \r\n Atomic:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n 00000\r\n Rd:\r\n 00000\r\n Wr:\r\n 00000\r\n Req:\r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n per-GCD\r\n cycles\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Rd:\r\n 00000\r\n \r\n \r\n cycles\r\n Wr:\r\n 00000\r\n Wave 0 Instr buff\r\n Wave N-1 Instr buff\r\n Active CUs\r\n \r\n \r\n %\r\n Hit:\r\n 00000\r\n \r\n \r\n cycles\r\n Lat:\r\n 00000\r\n \r\n \r\n %\r\n Util:\r\n 00000\r\n \r\n \r\n %\r\n Coales:\r\n 00000\r\n Exec\r\n Instr Buff\r\n Instr Dispatch\r\n LDS\r\n Vector L1 Cache\r\n Scalar L1D Cache\r\n Instr L1 Cache\r\n L2 Cache\r\n 00000\r\n Req:\r\n \r\n \r\n %\r\n Stall:\r\n 00000\r\n 00000\r\n Fetch:\r\n 0000000\r\n 00000\r\n 000/000\r\n \r\n Latency\r\n \r\n LDS Alloc:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n \r\n Scratch Alloc:\r\n \r\n 00000\r\n \r\n Wavefronts:\r\n \r\n 00000\r\n \r\n Workgroups:\r\n \r\n 00000\r\n \r\n VGPRs:\r\n \r\n 00000\r\n \r\n SGPRs:\r\n \r\n 00000\r\n \r\n \r\n 00000\r\n Rd:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n 00000\r\n 00000\r\n 00000\r\n Rd:\r\n Wr:\r\n Atomic:\r\n \r\n \r\n \r\n Latency\r\n \r\n \r\n \r\n \r\n Text is not SVG - cannot display\r\n \r\n \r\n" + }, + "pluginVersion": "8.4.0", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&Arch_VGPR\"\n },\n \"sgpr\": {\n \"$avg\": \"&SGPR\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&LDS_Per_Workgroup\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&Scratch_Per_Workitem\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$cu_per_gpu\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_LEVEL_WAVES", + "target": "$Workload1.SQ_LEVEL_WAVES.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_occ\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\",\"&GRBM_GUI_ACTIVE\"] }, $numActiveCUs]}\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Occupancy\",\n \"Alias\": \"wave_occ_\",\n \"Value\":{ \"$round\": [\"&wave_occ\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "$Workload1.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"lds_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&SQ_INSTS_LDS\", 0] },\n { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\"] },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"LDS Lat\",\n \"Alias\": \"lds_lat_\",\n \"Value\":{ \"$round\": [\"&LDS_Per_Workgroup\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_ICACHE_INFLIGHT", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Dispatch_ID\",\n\t\t\"foreignField\": \"Dispatch_ID\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"il1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_ICACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_ICACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"IL1 Lat\",\n \t\t\t\"Alias\": \"il1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&il1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQC_DCACHE_INFLIGHT_LEVEL", + "target": "$Workload1.pmc_perf.aggregate([\n\t{\"$match\": {\n\t\t\"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n \t}},\n\t{\"$lookup\": {\n\t\t\"from\": \"SQ_IFETCH_LEVEL\",\n\t\t\"localField\": \"Dispatch_ID\",\n\t\t\"foreignField\": \"Dispatch_ID\",\n\t\t\"as\": \"SQ_IFETCH_LEVEL\",\n\t\t\"pipeline\": [\n\t\t\t{\"$match\": {\n\t\t\t\t\"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \t\t\t\t\"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \t\t\t\t\"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n \t\t\t}},\n\t\t\t{\"$project\":{\n\t\t\t\t\"_id\": 0,\n\t\t\t\t\"SQ_ACCUM_PREV_HIRES\": 1\n\t\t\t}}\n\t\t]\n\t}},\n\t{\"$unwind\":{\n\t\t\"path\": \"&SQ_IFETCH_LEVEL\"\n\t}},\n\t{\"$group\":{\n\t\t\"_id\": null,\n\t\t\"sl1_lat\": {\n\t\t\t\"$avg\":{\n\t\t\t\t\"$cond\":[\n\t\t\t\t\t\t{\"$ne\":[\"&SQC_DCACHE_REQ\",0]},\n\t\t\t\t\t\t{\"$divide\":[\"&SQ_IFETCH_LEVEL.SQ_ACCUM_PREV_HIRES\",\"&SQC_DCACHE_REQ\"]},\n\t\t\t\t\t\tnull\n\t\t\t\t\t]\n\t\t\t}\n\t\t} \n\t}},\n\t{\"$set\": {\n \t\t\"array\": [\n \t\t{\n \t\t\t\"Metric\": \"vL1D Lat\",\n \t\t\t\"Alias\": \"sl1_lat_\",\n \t\t\t\"Value\": { \"$round\": [\"&sl1_lat\", 0] }\n \t\t}\n \t\t]\n\t}},\n \t{\"$unwind\": {\n \t\t\"path\": \"&array\"\n \t}},\n \t{\"$replaceRoot\": {\n \t\t\"newRoot\": \"&array\"\n \t}}\n]);", + "type": "table" + } + ], + "title": "Memory Chart (Normalization: $normUnit\")", + "transformations": [ + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "string", + "targetField": "Value" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "amd-custom-svg" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 79 + }, + "id": 241, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Roofline Analysis", + "type": "row" + }, + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 80 + }, + "id": 253, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm" + }, + "name": "HBM-VLAU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2" + }, + "name": "L2-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1" + }, + "name": "vL1D-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS" + }, + "name": "LDS-VALU", + "settings": { + "color_option": "ramp", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#33B5E5", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 15, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "circle" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA" + }, + "name": "HBM-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA" + }, + "name": "L2-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA" + }, + "name": "vL1D-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA" + }, + "name": "LDS-MFMA", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "rawQuery": true, + "refId": "HBM-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&hbm_bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-VALU", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"&high_flop\"\n }\n },\n\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&FP32Flops\", \"&FP64Flops\"]\n },\n \"then\": \"&FP64Flops\",\n \"else\": \"&FP32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&Kernel_Name\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&Kernel_Name\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&Kernel_Name\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&Kernel_Name\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [\"&MFMAF32Flops\", \"&MFMAF64Flops\"]\n },\n \"then\": \"&MFMAF64Flops\",\n \"else\": \"&MFMAF32Flops\"\n }\n }\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP32/FP64 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + }, + { + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 108 + }, + "id": 312, + "pconfig": { + "fixScale": "", + "layout": { + "dragmode": "zoom", + "font": { + "family": "\"Open Sans\", Helvetica, Arial, sans-serif" + }, + "hovermode": "closest", + "legend": { + "orientation": "v" + }, + "showlegend": true, + "xaxis": { + "range": [ + -2, + 3.8 + ], + "rangemode": "between", + "showgrid": true, + "title": "Arithmetic Intensity (FLOP/Byte)", + "type": "log", + "zeroline": false + }, + "yaxis": { + "rangemode": "normal", + "showgrid": true, + "title": "Performance (GFLOP/sec)", + "type": "log", + "zeroline": false + }, + "zaxis": { + "rangemode": "normal", + "showgrid": true, + "type": "linear", + "zeroline": false + } + }, + "loadFromCDN": false, + "settings": { + "displayModeBar": false, + "type": "scatter" + }, + "showAnnotations": true, + "traces": [ + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "cur_ai_hbm", + "y": "cur_perf" + }, + "name": "Cur - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "cur_ai_L2", + "y": "cur_perf" + }, + "name": "Cur - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "cur_ai_L1", + "y": "cur_perf" + }, + "name": "Cur - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "base_ai_hbm", + "y": "base_perf" + }, + "name": "Baseline - HBM", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#F2495C", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "square-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "base_ai_L2", + "y": "base_perf" + }, + "name": "Baseline - L2", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#5794F2", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "diamond-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "base_ai_L1", + "y": "base_perf" + }, + "name": "Baseline - vL1D", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": false, + "markers": true + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_fp16" + }, + "name": "HBM-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_fp16" + }, + "name": "L2-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_fp16" + }, + "name": "vL1D-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_fp16" + }, + "name": "LDS-MFMA-FP16", + "settings": { + "color_option": "solid", + "line": { + "color": "#005f81", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "hbmBw_peak", + "x": "xrange", + "y": "roofline_hbm_MFMA_i8" + }, + "name": "HBM-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L2Bw_peak", + "x": "xrange", + "y": "roofline_L2_MFMA_i8" + }, + "name": "L2-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "L1Bw_peak", + "x": "xrange", + "y": "roofline_L1_MFMA_i8" + }, + "name": "vL1D-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + }, + { + "mapping": { + "color": "n", + "text": "LDSBw_peak", + "x": "xrange", + "y": "roofline_LDS_MFMA_i8" + }, + "name": "LDS-MFMA-I8", + "settings": { + "color_option": "solid", + "line": { + "color": "#FA6400", + "dash": "solid", + "shape": "linear", + "width": 2 + }, + "marker": { + "color": "#B877D9", + "colorscale": "YlOrRd", + "line": { + "color": "#DDD", + "width": 0 + }, + "showscale": false, + "size": 12, + "sizemin": 3, + "sizemode": "diameter", + "sizeref": 0.2, + "symbol": "star-open" + } + }, + "show": { + "line": true, + "lines": true, + "markers": false + } + } + ] + }, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"hbmBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&hbm_bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "HBM-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_hbm_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&hbm_bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Cur Workload", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n { \"$group\": { \n \"_id\": \"&Kernel_Name\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&Kernel_Name\",\n \"cur_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"cur_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"cur_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"cur_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "Baseline Workload", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Roofline)\"}}\n }},\n { \"$group\": { \n \"_id\": \"&Kernel_Name\", \n \"Calls\": { \"$sum\": 1} ,\n \"TotalDuration\": { \"$sum\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n \"AvgDuration\": { \"$avg\": {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]}},\n\n \"total_flops\": {\n \"$avg\":{\"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] },\n { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }\n ]}\n },\n\n \"valu_flops\": {\n \"$avg\": { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }, \"&SQ_INSTS_VALU_TRANS_F16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }, \"&SQ_INSTS_VALU_TRANS_F32\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }, \"&SQ_INSTS_VALU_TRANS_F64\"] }] }\n ]}\n },\n \n \"mfma_flops_f16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] } },\n \"mfma_flops_bf16\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] } },\n \"mfma_flops_f32\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] } },\n \"mfma_flops_f64\": { \"$avg\": { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] } },\n\n\n \"lds_data\": {\n \"$avg\": { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]\n } \n },\n\n \"L1cache_data\": {\n \"$avg\": { \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64] } \n },\n\n \"L2cache_data\": {\n \"$avg\": {\n \"$multiply\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \n \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 64]}\n },\n\n \"hbm_data\": {\n \"$avg\": {\n \"$add\": [ { \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] },\n { \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]},\n { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }\n ]\n }\n }\n\n\n }},\n {\"$sort\": { \"TotalDuration\": -1 }},\n { \"$limit\": 10 },\n {\"$project\": {\n \"_id\": 0,\n \"Kernel\": \"&Kernel_Name\",\n \"base_ai_L1\": {\n \"$cond\": [\n { \"$ne\": [\"&L1cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L1cache_data\"] },\n 0\n ]\n },\n \"base_ai_L2\": {\n \"$cond\": [\n { \"$ne\": [\"&L2cache_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&L2cache_data\"] },\n 0\n ]\n },\n \"base_ai_hbm\": {\n \"$cond\": [\n { \"$ne\": [\"&hbm_data\", 0] },\n {\"$divide\": [\"&total_flops\", \"&hbm_data\"] },\n 0\n ]\n },\n \"base_perf\": { \n \"$cond\": [\n { \"$ne\": [\"&AvgDuration\", 0] },\n { \"$divide\": [\"&total_flops\", \"&AvgDuration\"] },\n 0\n ]\n }\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L2Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L2Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L2-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L2_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L2Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L2Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"L1Bw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&L1Bw\", 1] },0, 32]}, \" GB/s\"] }\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "LJKvMZank" + }, + "hide": false, + "rawQuery": true, + "refId": "L1-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_L1_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&L1Bw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&L1Bw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-FP16", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAF16Flops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_fp16\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n },\n \"LDSBw_peak\": { \"$concat\": [{\"$substr\":[{ \"$round\":[\"&LDSBw\", 1] },0, 32]}, \" GB/s\"] }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "jYrBWHfnz" + }, + "hide": false, + "rawQuery": true, + "refId": "LDS-MFMA-I8", + "target": "${Workload1}.roofline.aggregate([\n {\"$match\": {\n \"device\": { \"$in\": [${gpuFilter:raw}] }\n }},\n { \"$addFields\": {\n \"xaxis\": {\"$range\": [0, 50000]}\n }},\n { \"$addFields\": {\n \"high_flop\": \"&MFMAI8Ops\"\n }},\n { \"$addFields\": {\n \"cte\": {\n \"$map\": {\n \"input\": \"$xaxis\",\n \"as\": \"n\",\n \"in\": {\n \"xrange\": {\"$multiply\": [0.1, \"$$n\"]},\n \"roofline_LDS_MFMA_i8\": {\n \"$cond\": {\n \"if\": {\n \"$lt\": [ {\"$multiply\": [0.1, \"$$n\", \"&LDSBw\"]}, \"$high_flop\" ]\n },\n \"then\": { \"$multiply\": [0.1, \"$$n\", \"&LDSBw\"] },\n \"else\": \"$high_flop\"\n }\n }\n\n } \n }}\n }},\n \n {\"$unwind\": \"$cte\"\n },\n {\"$replaceRoot\": {\n \"newRoot\": \"$cte\"\n }}\n]);", + "type": "table" + } + ], + "title": "Empirical Roofline FP16/INT8 (MI200)", + "type": "natel-plotly-panel", + "version": 1 + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 136 + }, + "id": 2, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Command Processor (CPC/CPF)", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 13, + "x": 0, + "y": 137 + }, + "id": 6, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1 \n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpfBusy_avg\": {\"$avg\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_min\": {\"$min\": \"&CPF_CPF_STAT_BUSY\"},\n \"cpfBusy_max\": {\"$max\": \"&CPF_CPF_STAT_BUSY\"},\n\n \"cpfUtil_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n \"cpfUtil_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }, 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_BUSY\"] }, { \"$add\": [\"&CPF_CPF_STAT_BUSY\", \"&CPF_CPF_STAT_IDLE\"] }]},\n null\n ]\n }},\n\n \"cpfStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n \"cpfStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_STAT_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_STAT_STALL\"] }, \"&CPF_CPF_STAT_BUSY\"] } ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\": {\"$avg\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_min\": {\"$min\": \"&CPF_CPF_TCIU_BUSY\"},\n \"l2CacheBusy_max\": {\"$max\": \"&CPF_CPF_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_BUSY\"] }, { \"$add\": [\"&CPF_CPF_TCIU_BUSY\", \"&CPF_CPF_TCIU_IDLE\"] }]},\n null\n ]\n }},\n \"l2CacheStall_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n \"l2CacheStall_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPF_CPF_TCIU_BUSY\", 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&CPF_CPF_TCIU_STALL\"] }, \"&CPF_CPF_TCIU_BUSY\"] },\n null\n ]\n }},\n\n \"utcL1Stall_avg\": {\"$avg\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\": {\"$min\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\": {\"$max\": \"&CPF_CMP_UTCL1_STALL_ON_TRANSLATION\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Busy\",\n \"Avg\": \"&cpfBusy_avg\",\n \"Min\": \"&cpfBusy_min\",\n \"Max\": \"&cpfBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"CPF Util\",\n \"Avg\": \"&cpfUtil_avg\",\n \"Min\": \"&cpfUtil_min\",\n \"Max\": \"&cpfUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPF Stall\",\n \"Avg\": \"&cpfStall_avg\",\n \"Min\": \"&cpfStall_min\",\n \"Max\": \"&cpfStall_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Busy\",\n \"Avg\": \"&l2CacheBusy_avg\",\n \"Min\": \"&l2CacheBusy_min\",\n \"Max\": \"&l2CacheBusy_max\", \n \"Unit\": \"Cycles/Kernel\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Stall\",\n \"Avg\": \"&l2CacheStall_avg\",\n \"Min\": \"&l2CacheStall_min\",\n \"Max\": \"&l2CacheStall_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles/Kernel\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Fetcher", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 171 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baselin)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 147 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 11, + "x": 13, + "y": 137 + }, + "id": 4, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Command Processor)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"gpuBusy_avg\": {\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\": {\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\": {\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"cpcBusy_avg\": {\"$avg\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_min\": {\"$min\": \"&CPC_CPC_STAT_BUSY\"},\n \"cpcBusy_max\": {\"$max\": \"&CPC_CPC_STAT_BUSY\"},\n\n \"cpcUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n \"cpcUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_BUSY\"] }, { \"$add\": [\"&CPC_CPC_STAT_BUSY\", \"&CPC_CPC_STAT_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"cpcStall_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStall_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallCycles_avg\": {\"$avg\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_min\": {\"$min\": \"&CPC_CPC_STAT_STALL\"},\n \"cpcStallCycles_max\": {\"$max\": \"&CPC_CPC_STAT_STALL\"},\n\n \"cpcStallRate_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"cpcStallRate_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_STAT_STALL\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"cpcPacketDecoding_avg\":{\"$avg\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_min\":{\"$min\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n \"cpcPacketDecoding_max\":{\"$max\": \"&CPC_ME1_BUSY_FOR_PACKET_DECODE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&CPC_ME1_DC0_SPI_BUSY\"},\n\n \"spiUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n \"spiUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [\"&CPC_CPC_STAT_BUSY\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_ME1_DC0_SPI_BUSY\"] }, \"&CPC_CPC_STAT_BUSY\"]} ,\n null\n ]\n }},\n\n \"l2CacheBusy_avg\":{\"$avg\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_min\":{\"$min\": \"&CPC_CPC_TCIU_BUSY\"},\n \"l2CacheBusy_max\":{\"$max\": \"&CPC_CPC_TCIU_BUSY\"},\n\n \"l2CacheUtil_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n \"l2CacheUtil_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_TCIU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_TCIU_BUSY\", \"&CPC_CPC_TCIU_IDLE\"] }]} ,\n null\n ]\n }},\n\n \"utcL1Stall_avg\":{\"$avg\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_min\":{\"$min\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n \"utcL1Stall_max\":{\"$max\": \"&CPC_UTCL1_STALL_ON_TRANSLATION\"},\n\n \"utcL2Busy_avg\":{\"$avg\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_min\":{\"$min\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n \"utcL2Busy_max\":{\"$max\": \"&CPC_CPC_UTCL2IU_BUSY\"},\n\n \"utcL2Util_avg\": {\"$avg\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_min\": {\"$min\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }},\n \"utcL2Util_max\": {\"$max\":{\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }, 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&CPC_CPC_UTCL2IU_BUSY\"] }, { \"$add\": [\"&CPC_CPC_UTCL2IU_BUSY\", \"&CPC_CPC_UTCL2IU_IDLE\"] }]} ,\n null\n ]\n }}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy Cycles\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Busy Cycles\",\n \"Avg\": \"&cpcBusy_avg\",\n \"Min\": \"&cpcBusy_min\",\n \"Max\": \"&cpcBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Util\",\n \"Avg\": \"&cpcUtil_avg\",\n \"Min\": \"&cpcUtil_min\",\n \"Max\": \"&cpcUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Stall Cycles\",\n \"Avg\": \"&cpcStallCycles_avg\",\n \"Min\": \"&cpcStallCycles_min\",\n \"Max\": \"&cpcStallCycles_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CPC Stall Rate\",\n \"Avg\": \"&cpcStallRate_avg\",\n \"Min\": \"&cpcStallRate_min\",\n \"Max\": \"&cpcStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"CPC Packet Decoding\",\n \"Avg\": \"&cpcPacketDecoding_avg\",\n \"Min\": \"&cpcPacketDecoding_min\",\n \"Max\": \"&cpcPacketDecoding_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Busy Cycles\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Intf Util\",\n \"Avg\": \"&spiUtil_avg\",\n \"Min\": \"&spiUtil_min\",\n \"Max\": \"&spiUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"L2Cache Intf Util\",\n \"Avg\": \"&l2CacheUtil_avg\",\n \"Min\": \"&l2CacheUtil_min\",\n \"Max\": \"&l2CacheUtil_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"UTCL1 Stall Cycles\",\n \"Avg\": \"&utcL1Stall_avg\",\n \"Min\": \"&utcL1Stall_min\",\n \"Max\": \"&utcL1Stall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Busy Cycles\",\n \"Avg\": \"&utcL2Busy_avg\",\n \"Min\": \"&utcL2Busy_min\",\n \"Max\": \"&utcL2Busy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"UTCL2 Intf Util\",\n \"Avg\": \"&utcL2Util_avg\",\n \"Min\": \"&utcL2Util_min\",\n \"Max\": \"&utcL2Util_max\", \n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Command Processor Compute", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Metric 1": "", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 150 + }, + "id": 102, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Shader Processor Input (SPI)", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 101 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max 1" + }, + "properties": [ + { + "id": "custom.width", + "value": 145 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 97 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min 2" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 123 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 151 + }, + "id": 106, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"csBusy_avg\":{\"$avg\": \"&SPI_CSN_BUSY\"},\n \"csBusy_min\":{\"$min\": \"&SPI_CSN_BUSY\"},\n \"csBusy_max\":{\"$max\": \"&SPI_CSN_BUSY\"},\n\n \"gpuBusy_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"gpuBusy_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"},\n\n \"spiBusy_avg\":{\"$avg\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_min\":{\"$min\": \"&GRBM_SPI_BUSY\"},\n \"spiBusy_max\":{\"$max\": \"&GRBM_SPI_BUSY\"},\n\n \"sqBusy_avg\":{\"$avg\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_min\":{\"$min\": \"&SQ_BUSY_CYCLES\"},\n \"sqBusy_max\":{\"$max\": \"&SQ_BUSY_CYCLES\"},\n\n \"dispatchedWorkgroups_avg\":{\"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_min\":{\"$min\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n \"dispatchedWorkgroups_max\":{\"$max\": \"&SPI_CSN_NUM_THREADGROUPS\"},\n\n \"dispatchedWavefronts_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"dispatchedWavefronts_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"waveAllocFail_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC\"},\n \"waveAllocFail_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC\"},\n\n \"waveAllocFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveAllocFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"GPU Busy\",\n \"Avg\": \"&gpuBusy_avg\",\n \"Min\": \"&gpuBusy_min\",\n \"Max\": \"&gpuBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Busy\",\n \"Avg\": \"&csBusy_avg\",\n \"Min\": \"&csBusy_min\",\n \"Max\": \"&csBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SPI Busy\",\n \"Avg\": \"&spiBusy_avg\",\n \"Min\": \"&spiBusy_min\",\n \"Max\": \"&spiBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"SQ Busy\",\n \"Avg\": \"&sqBusy_avg\",\n \"Min\": \"&sqBusy_min\",\n \"Max\": \"&sqBusy_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Dispatched Workgroups\",\n \"Avg\": \"&dispatchedWorkgroups_avg\",\n \"Min\": \"&dispatchedWorkgroups_min\",\n \"Max\": \"&dispatchedWorkgroups_max\", \n \"Unit\": \"Workgroups\"\n },\n {\n \"Metric\": \"Dispatched Wavefronts\",\n \"Avg\": \"&dispatchedWavefronts_avg\",\n \"Min\": \"&dispatchedWavefronts_min\",\n \"Max\": \"&dispatchedWavefronts_max\", \n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Wave Alloc Failed\",\n \"Avg\": \"&waveAllocFail_avg\",\n \"Min\": \"&waveAllocFail_min\",\n \"Max\": \"&waveAllocFail_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Wave Alloc Failed - CS\",\n \"Avg\": \"&waveAllocFailCS_avg\",\n \"Min\": \"&waveAllocFailCS_min\",\n \"Max\": \"&waveAllocFailCS_max\", \n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 285 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 242 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + } + ] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 151 + }, + "id": 104, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Shader Processor Input)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"waveReqFailCS_avg\":{\"$avg\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_min\":{\"$min\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n \"waveReqFailCS_max\":{\"$max\": \"&SPI_RA_REQ_NO_ALLOC_CSN\"},\n\n \"csStall_avg\":{\"$avg\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_min\":{\"$min\": \"&SPI_RA_RES_STALL_CSN\"},\n \"csStall_max\":{\"$max\": \"&SPI_RA_RES_STALL_CSN\"},\n\n \"csStallRate_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n \"csStallRate_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$multiply\": [100, \"&SPI_RA_RES_STALL_CSN\"]}, \"&GRBM_SPI_BUSY\"]},\n null\n ]\n }},\n\n \"scratchStall_avg\":{\"$avg\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_min\":{\"$min\": \"&SPI_RA_TMP_STALL_CSN\"},\n \"scratchStall_max\":{\"$max\": \"&SPI_RA_TMP_STALL_CSN\"},\n\n \"simdWaveslots_avg\":{\"$avg\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_min\":{\"$min\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n \"simdWaveslots_max\":{\"$max\": \"&SPI_RA_WAVE_SIMD_FULL_CSN\"},\n\n \"simdVGPRs_avg\":{\"$avg\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_min\":{\"$min\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n \"simdVGPRs_max\":{\"$max\": \"&SPI_RA_VGPR_SIMD_FULL_CSN\"},\n\n \"simdSGPRs_avg\":{\"$avg\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_min\":{\"$min\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n \"simdSGPRs_max\":{\"$max\": \"&SPI_RA_SGPR_SIMD_FULL_CSN\"},\n\n \"cuLDS_avg\":{\"$avg\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_min\":{\"$min\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n \"cuLDS_max\":{\"$max\": \"&SPI_RA_LDS_CU_FULL_CSN\"},\n\n \"cuBarries_avg\":{\"$avg\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_min\":{\"$min\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n \"cuBarries_max\":{\"$max\": \"&SPI_RA_BAR_CU_FULL_CSN\"},\n\n \"bulkyResource_avg\":{\"$avg\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_min\":{\"$min\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n \"bulkyResource_max\":{\"$max\": \"&SPI_RA_BULKY_CU_FULL_CSN\"},\n\n \"threadgroupLimit_avg\":{\"$avg\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_min\":{\"$min\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n \"threadgroupLimit_max\":{\"$max\": \"&SPI_RA_TGLIM_CU_FULL_CSN\"},\n\n \"waveLimit_avg\":{\"$avg\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_min\":{\"$min\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n \"waveLimit_max\":{\"$max\": \"&SPI_RA_WVLIM_STALL_CSN\"},\n\n \"vgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n \"vgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n {\"$divide\": [{ \"$multiply\": [4, \"&SPI_VWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"]},\n null\n ]\n }},\n\n \"sgprsWrites_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \"sgprsWrites_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&SPI_CSN_WAVE\", 0]}, \n { \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&SPI_CSN_WAVE\"] },\n null\n ]\n }},\n \n \n \"sgprsWritesUti_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }},\n \"sgprsWritesUti_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&GRBM_SPI_BUSY\", 0]}, \n {\"$divide\": [{ \"$divide\": [{ \"$multiply\": [1, \"&SPI_SWC_CSC_WR\"] }, \"&GRBM_SPI_BUSY\"] }, 8] },\n null\n ]\n }}\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave request Failed (CS)\",\n \"Avg\": \"&waveReqFailCS_avg\",\n \"Min\": \"&waveReqFailCS_min\",\n \"Max\": \"&waveReqFailCS_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall\",\n \"Avg\": \"&csStall_avg\",\n \"Min\": \"&csStall_min\",\n \"Max\": \"&csStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"CS Stall Rate\",\n \"Avg\": \"&csStallRate_avg\",\n \"Min\": \"&csStallRate_min\",\n \"Max\": \"&csStallRate_max\", \n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Scratch Stall\",\n \"Avg\": \"&scratchStall_avg\",\n \"Min\": \"&scratchStall_min\",\n \"Max\": \"&scratchStall_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Insufficient SIMD Waveslots\",\n \"Avg\": \"&simdWaveslots_avg\",\n \"Min\": \"&simdWaveslots_min\",\n \"Max\": \"&simdWaveslots_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD VGPRs\",\n \"Avg\": \"&simdVGPRs_avg\",\n \"Min\": \"&simdVGPRs_min\",\n \"Max\": \"&simdVGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient SIMD SGPRs\",\n \"Avg\": \"&simdSGPRs_avg\",\n \"Min\": \"&simdSGPRs_min\",\n \"Max\": \"&simdSGPRs_max\", \n \"Unit\": \"#SIMD\"\n },\n {\n \"Metric\": \"Insufficient CU LDS\",\n \"Avg\": \"&cuLDS_avg\",\n \"Min\": \"&cuLDS_min\",\n \"Max\": \"&cuLDS_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient CU Barries\",\n \"Avg\": \"&cuBarries_avg\",\n \"Min\": \"&cuBarries_min\",\n \"Max\": \"&cuBarries_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Insufficient Bulky Resource\",\n \"Avg\": \"&bulkyResource_avg\",\n \"Min\": \"&bulkyResource_min\",\n \"Max\": \"&bulkyResource_max\", \n \"Unit\": \"#CU\"\n },\n {\n \"Metric\": \"Reach CU Threadgroups Limit\",\n \"Avg\": \"&threadgroupLimit_avg\",\n \"Min\": \"&threadgroupLimit_min\",\n \"Max\": \"&threadgroupLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"Reach CU Wave Limit\",\n \"Avg\": \"&waveLimit_avg\",\n \"Min\": \"&waveLimit_min\",\n \"Max\": \"&waveLimit_max\", \n \"Unit\": \"Cycles\"\n },\n {\n \"Metric\": \"VGPR Writes\",\n \"Avg\": \"&vgprsWrites_avg\",\n \"Min\": \"&vgprsWrites_min\",\n \"Max\": \"&vgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"SGPR Writes\",\n \"Avg\": \"&sgprsWrites_avg\",\n \"Min\": \"&sgprsWrites_min\",\n \"Max\": \"&sgprsWrites_max\", \n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "SPI Resource Allocation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 166 + }, + "id": 185, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Wavefront", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 142 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 196 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 174 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.width", + "value": 168 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min" + }, + "properties": [ + { + "id": "custom.width", + "value": 272 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 225 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 167 + }, + "id": 10, + "interval": "12h", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&Grid_Size\"},\n \"gridSize_min\":{\"$min\": \"&Grid_Size\"},\n \"gridSize_max\":{\"$max\": \"&Grid_Size\"},\n\n \"workSize_avg\":{\"$avg\": \"&Workgroup_Size\"},\n \"workSize_min\":{\"$min\": \"&Workgroup_Size\"},\n \"workSize_max\":{\"$max\": \"&Workgroup_Size\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&SGPR\"},\n \"sgprs_min\":{\"$min\": \"&SGPR\"},\n \"sgprs_max\":{\"$max\": \"&SGPR\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&LDS_Per_Workgroup\"},\n \"ldsAlloc_min\":{\"$min\": \"&LDS_Per_Workgroup\"},\n \"ldsAlloc_max\":{\"$max\": \"&LDS_Per_Workgroup\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&Scratch_Per_Workitem\"},\n \"scratchAlloc_min\":{\"$min\": \"&Scratch_Per_Workitem\"},\n \"scratchAlloc_max\":{\"$max\": \"&Scratch_Per_Workitem\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&Grid_Size\"},\n \"gridSize_min\":{\"$min\": \"&Grid_Size\"},\n \"gridSize_max\":{\"$max\": \"&Grid_Size\"},\n\n \"workSize_avg\":{\"$avg\": \"&Workgroup_Size\"},\n \"workSize_min\":{\"$min\": \"&Workgroup_Size\"},\n \"workSize_max\":{\"$max\": \"&Workgroup_Size\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\n \"$avg\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&Arch_VGPR\"\n ] \n }\n },\n \"vgprs_min\":{\n \"$min\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&Arch_VGPR\"\n ] \n }\n },\n \"vgprs_max\":{\n \"$max\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&Arch_VGPR\"\n ] \n }\n },\n\n \"agprs_avg\":{\n \"$avg\": \"&Accum_VGPR\"\n },\n \"agprs_min\":{\n \"$min\": \"&Accum_VGPR\"\n },\n \"agprs_max\":{\n \"$max\": \"&Accum_VGPR\"\n },\n\n \"sgprs_avg\":{\"$avg\": \"&SGPR\"},\n \"sgprs_min\":{\"$min\": \"&SGPR\"},\n \"sgprs_max\":{\"$max\": \"&SGPR\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&LDS_Per_Workgroup\"},\n \"ldsAlloc_min\":{\"$min\": \"&LDS_Per_Workgroup\"},\n \"ldsAlloc_max\":{\"$max\": \"&LDS_Per_Workgroup\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&Scratch_Per_Workitem\"},\n \"scratchAlloc_min\":{\"$min\": \"&Scratch_Per_Workitem\"},\n \"scratchAlloc_max\":{\"$max\": \"&Scratch_Per_Workitem\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"AGPRs\",\n \"Avg\": \"&agprs_avg\",\n \"Min\": \"&agprs_min\",\n \"Max\": \"&agprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Wavefront Launch Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true, + "Units 2": true, + "metric 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 122 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg" + }, + "properties": [ + { + "id": "custom.width", + "value": 223 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 169 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 167 + }, + "id": 34, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"instrWavefront_avg\":{\"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_min\":{\"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n \"instrWavefront_max\":{\"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_WAVES\" ] }},\n\n \"waveCycles_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n \"waveCycles_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ]}},\n\n \"depWaitingCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"depWaitingCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"issueWaitCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"issueWaitCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_WAIT_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"activeCycles_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n \"activeCycles_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [4, \"&SQ_ACTIVE_INST_ANY\"] }, \"&SQ_WAVES\" ]}},\n\n \"kernelTime_avg\":{\"$avg\": {\"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\" ]}},\n \"kernelTime_min\":{\"$min\": {\"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\" ]}},\n \"kernelTime_max\":{\"$max\": {\"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\" ]}},\n\n \"kernelCycle_avg\":{\"$avg\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_min\":{\"$min\": \"&GRBM_GUI_ACTIVE\"},\n \"kernelCycle_max\":{\"$max\": \"&GRBM_GUI_ACTIVE\"}\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Kernel Time (Nanosec)\",\n \"Avg\": \"&kernelTime_avg\",\n \"Min\": \"&kernelTime_min\",\n \"Max\": \"&kernelTime_max\",\n \"Unit\":\"ns\"\n },\n {\n \"Metric\": \"Kernel Time (Cycles)\",\n \"Avg\": \"&kernelCycle_avg\",\n \"Min\": \"&kernelCycle_min\",\n \"Max\": \"&kernelCycle_max\",\n \"Unit\":\"Cycle\"\n },\n\n {\n \"Metric\": \"Instr/wavefront\",\n \"Avg\": \"&instrWavefront_avg\",\n \"Min\": \"&instrWavefront_min\",\n \"Max\": \"&instrWavefront_max\",\n \"Unit\": \"Instr/wavefront\"\n },\n {\n \"Metric\": \"Wave Cycles\",\n \"Avg\": \"&waveCycles_avg\",\n \"Min\": \"&waveCycles_min\",\n \"Max\": \"&waveCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Dependency Wait Cycles\",\n \"Avg\": \"&depWaitingCycles_avg\",\n \"Min\": \"&depWaitingCycles_min\",\n \"Max\": \"&depWaitingCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Issue Wait Cycles\",\n \"Avg\": \"&issueWaitCycles_avg\",\n \"Min\": \"&issueWaitCycles_min\",\n \"Max\": \"&issueWaitCycles_max\",\n \"Unit\": \"Cycles/wave\"\n },\n {\n \"Metric\": \"Active Cycles\",\n \"Avg\": \"&activeCycles_avg\",\n \"Min\": \"&activeCycles_min\",\n \"Max\": \"&activeCycles_max\",\n \"Unit\": \"Cycles/wave\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_LEVEL_WAVES\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"occAvg\": {\"$avg\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMin\": {\"$min\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } },\n \"occMax\": {\"$max\": { \"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\" ] } }\n }},\n {\"$project\": {\n \"_id\":0,\n \"Metric\": \"Wavefront Occupancy\",\n \"Avg\": \"&occAvg\",\n \"Min\":\"&occMin\",\n \"Max\":\"&occMax\",\n \"Unit\":\"Wavefronts\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "Wavefront Runtime Stats", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg": "", + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit 1": "", + "Unit 2": "" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 178 + }, + "id": 209, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Instruction Mix", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 179 + }, + "id": 12, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"avg\": \"&valu_avg\",\n \"min\": \"&valu_min\",\n \"max\": \"&valu_max\"\n },\n {\n \"metric\": \"VMEM\",\n \"avg\": \"&vmem_avg\",\n \"min\": \"&vmem_min\",\n \"max\": \"&vmem_max\"\n },\n {\n \"metric\": \"LDS\",\n \"avg\": \"&lds_avg\",\n \"min\": \"&lds_min\",\n \"max\": \"&lds_max\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"avg\": \"&mfma_avg\",\n \"min\": \"&mfma_min\",\n \"max\": \"&mfma_max\"\n },\n {\n \"metric\": \"SALU\",\n \"avg\": \"&salu_avg\",\n \"min\": \"&salu_min\",\n \"max\": \"&salu_max\"\n },\n {\n \"metric\": \"SMEM\",\n \"avg\": \"&smem_avg\",\n \"min\": \"&smem_min\",\n \"max\": \"&smem_max\"\n },\n {\n \"metric\": \"Branch\",\n \"avg\": \"&branch_avg\",\n \"min\": \"&branch_min\",\n \"max\": \"&branch_max\"\n },\n {\n \"metric\": \"GDS\",\n \"avg\": \"&gds_avg\",\n \"min\": \"&gds_min\",\n \"max\": \"&gds_max\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_avg\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_avg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_min\": {\n \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_min\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n },\n \"valu_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem_max\": {\n \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds_max\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"avg\": \"&valu_avg\",\n \"min\": \"&valu_min\",\n \"max\": \"&valu_max\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"avg\": \"&vmem_avg\",\n \"min\": \"&vmem_min\",\n \"max\": \"&vmem_max\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"avg\": \"&lds_avg\",\n \"min\": \"&lds_min\",\n \"max\": \"&lds_max\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"avg\": \"&mfma_avg\",\n \"min\": \"&mfma_min\",\n \"max\": \"&mfma_max\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"avg\": \"&salu_avg\",\n \"min\": \"&salu_min\",\n \"max\": \"&salu_max\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"avg\": \"&smem_avg\",\n \"min\": \"&smem_min\",\n \"max\": \"&smem_max\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"avg\": \"&branch_avg\",\n \"min\": \"&branch_min\",\n \"max\": \"&branch_max\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"avg\": \"&gds_avg\",\n \"min\": \"&gds_min\",\n \"max\": \"&gds_max\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Instruction Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "max": true, + "min": true + }, + "indexByName": {}, + "renameByName": { + "avg": "" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 24, + "w": 12, + "x": 12, + "y": 179 + }, + "id": 24, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "limit": 100, + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VALU Arithmetic Instr Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 196 + }, + "id": 275, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VMEM Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "VMEM Instr", + "type 1": "VMEM Instr" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 203 + }, + "id": 16, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "MFMA Arithmetic Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "MFMA Instr", + "type 1": "MFMA Instr" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 213 + }, + "id": 8, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Compute Unit - Compute Pipeline", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 214 + }, + "id": 211, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $cu_per_gpu, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"valu_flops_val\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n ]}\n ,{ \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }]\n }},\n\n \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] } },\n\n \"instr_val\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] }] }\n }\n\n }}, \n {\"$set\": {\n \"array\": [\n {\n \"valu_flops_pop\": {\"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 64, 2 ]}, 1000] }] },\n \"mfma_flops_bf16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 512 ]}, 1000] }]},\n \"mfma_flops_f16_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 1024 ]}, 1000] }]},\n \"mfma_flops_f32_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 256 ]}, 1000] }]},\n \"mfma_flops_f64_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 256 ]}, 1000] }]},\n \"mfma_flops_i8_pop\": { \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk2, $cu_per_gpu2, 1024 ]}, 1000] }]}\n }\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Compute Pipeline", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "mfma_flops_bf16_pop 1": 4, + "mfma_flops_bf16_pop 2": 5, + "mfma_flops_f16_pop 1": 6, + "mfma_flops_f16_pop 2": 7, + "mfma_flops_f32_pop 1": 8, + "mfma_flops_f32_pop 2": 9, + "mfma_flops_f64_pop 1": 10, + "mfma_flops_f64_pop 2": 11, + "mfma_flops_i8_pop 1": 12, + "mfma_flops_i8_pop 2": 13, + "valu_flops_pop 1": 0, + "valu_flops_pop 2": 1, + "valu_iops_pop 1": 2, + "valu_iops_pop 2": 3 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "flops_pop": "FLOPs", + "flops_pop 1": "FLOPs (Current)", + "flops_pop 2": "FLOPs (Baseline)", + "iops_pop": "IOPs", + "iops_pop 1": "IOPs (Current)", + "iops_pop 2": "IOPs (Baseline)", + "mfma_flops_bf16_pop": "MFMA- BF16 (FLOPs)", + "mfma_flops_bf16_pop 1": "MFMA-BF16 (Cur)", + "mfma_flops_bf16_pop 2": "MFMA-BF16 (Baseline)", + "mfma_flops_f16_pop": "MFMA-F16 (FLOPs)", + "mfma_flops_f16_pop 1": "MFMA-F16 (Cur)", + "mfma_flops_f16_pop 2": "MFMA-F16 (Baseline)", + "mfma_flops_f32_pop": "MFMA-F32 (FLOPs)", + "mfma_flops_f32_pop 1": "MFMA-F32 (Cur)", + "mfma_flops_f32_pop 2": "MFMA-F32 (Baseline)", + "mfma_flops_f64_pop": "MFMA-F64 (FLOPs)", + "mfma_flops_f64_pop 1": "MFMA-F64 (Cur)", + "mfma_flops_f64_pop 2": "MFMA-F64 (Baseline)", + "mfma_flops_i8_pop": "MFMA-i8 (IOPs)", + "mfma_flops_i8_pop 1": "MFMA-I8 (Cur)", + "mfma_flops_i8_pop 2": "MFMA-I8 (Baseline)", + "valu_flops_pop": "VALU (FLOPs)", + "valu_flops_pop 1": "VALU FLOPs (Cur)", + "valu_flops_pop 2": "VALU FLOPs (Baseline)", + "valu_iops_pop": "VALU (IOPs)", + "valu_iops_pop 1": "VALU IOPs (Cur)", + "valu_iops_pop 2": "VALU IOPs (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 214 + }, + "id": 257, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu ] }\n },\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $cu_per_gpu, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $cu_per_gpu, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $cu_per_gpu, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n }\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg\": \"&avg_ipcAvg\",\n \"Min\": \"&min_ipcAvg\",\n \"Max\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg\": \"&avg_ipcIssue\",\n \"Min\": \"&min_ipcIssue\",\n \"Max\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg\": \"&avg_saluUtil\",\n \"Min\": \"&min_saluUtil\",\n \"Max\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg\": \"&avg_valuUtil\",\n \"Min\": \"&min_valuUtil\",\n \"Max\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg\": \"&avg_unpredthreads_val\",\n \"Min\": \"&min_unpredthreads_val\",\n \"Max\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg\": \"&avg_mfmaUtil\",\n \"Min\": \"&min_mfmaUtil\",\n \"Max\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg\": \"&avg_mfmaInstrCycles\",\n \"Min\": \"&min_mfmaInstrCycles\",\n \"Max\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ipcAvg\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \"min_ipcAvg\": {\n \"$min\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] } \n },\n \"max_ipcAvg\": {\n \"$max\": { \"$divide\": [ \"&SQ_INSTS\", \"&SQ_BUSY_CU_CYCLES\" ] }\n },\n \n \"avg_ipcIssue\": {\n \"$avg\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"min_ipcIssue\": {\n \"$min\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n \"max_ipcIssue\": {\n \"$max\": { \"$divide\": [ { \"$add\": [ \"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\",\"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n },\n\n \"avg_saluUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu2 ] }\n },\n \"min_saluUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu2 ] }\n },\n \"max_saluUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu2 ] }\n },\n\n \"avg_valuUtil\": {\n \"$avg\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu2 ] }\n },\n \"min_valuUtil\": {\n \"$min\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu2 ] }\n },\n \"max_valuUtil\": {\n \"$max\": { \"$divide\": [{ \"$divide\": [ { \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, \"&GRBM_GUI_ACTIVE\"] },$cu_per_gpu2 ] }\n },\n\n\n \"avg_mfmaUtil\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $cu_per_gpu2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"min_mfmaUtil\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $cu_per_gpu2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n \"max_mfmaUtil\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [4, $cu_per_gpu2, \"&GRBM_GUI_ACTIVE\"] }] }\n },\n\n \"avg_mfmaInstrCycles\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"min_mfmaInstrCycles\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n \"max_mfmaInstrCycles\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_MFMA\", 0]},\n {\"$divide\": [ \"&SQ_VALU_MFMA_BUSY_CYCLES\", \"&SQ_INSTS_MFMA\" ]},\n null\n ]\n }\n },\n\n \"avg_unpredthreads_val\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"min_unpredthreads_val\": {\n \"$min\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n \"max_unpredthreads_val\": {\n \"$max\": {\n \"$cond\": [\n {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n null\n ]\n }\n },\n\n \"avg_instrFetchBand\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \"min_instrFetchBand\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \"max_instrFetchBand\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [\"&SQ_IFETCH\", 32] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"IPC (Avg)\",\n \"Avg 2\": \"&avg_ipcAvg\",\n \"Min 2\": \"&min_ipcAvg\",\n \"Max 2\": \"&max_ipcAvg\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"IPC (Issue)\",\n \"Avg 2\": \"&avg_ipcIssue\",\n \"Min 2\": \"&min_ipcIssue\",\n \"Max 2\": \"&max_ipcIssue\",\n \"Unit\": \"Instr/cycle\"\n },\n {\n \"Metric\": \"SALU Util\",\n \"Avg 2\": \"&avg_saluUtil\",\n \"Min 2\": \"&min_saluUtil\",\n \"Max 2\": \"&max_saluUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Util\",\n \"Avg 2\": \"&avg_valuUtil\",\n \"Min 2\": \"&min_valuUtil\",\n \"Max 2\": \"&max_valuUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"VALU Active Threads\",\n \"Avg 2\": \"&avg_unpredthreads_val\",\n \"Min 2\": \"&min_unpredthreads_val\",\n \"Max 2\": \"&max_unpredthreads_val\",\n \"Unit\": \"Threads\"\n },\n {\n \"Metric\": \"MFMA Util\",\n \"Avg 2\": \"&avg_mfmaUtil\",\n \"Min 2\": \"&min_mfmaUtil\",\n \"Max 2\": \"&max_mfmaUtil\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"MFMA Instr Cycles\",\n \"Avg 2\": \"&avg_mfmaInstrCycles\",\n \"Min 2\": \"&min_mfmaInstrCycles\",\n \"Max 2\": \"&max_mfmaInstrCycles\",\n \"Unit\": \"cycles/instr\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Pipeline Stats", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg 2": "Avg (Baseline)", + "Max 2": "Max (Baseline)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 117 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 118 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 225 + }, + "id": 96, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_intOps\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"min_intOps\":{\"$min\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n \"max_intOps\":{\"$max\": {\n \"$divide\": [{ \"$add\": [{ \"$multiply\": [{ \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] },64] }, { \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\",512] }] },\"&denom\"]\n }},\n\n\n \"avg_flops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_flops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n \"max_flops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F16\", 2]}] }] },\n { \"$add\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"] }, { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] },\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n\n\n \"avg_int8Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"min_int8Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n \"max_int8Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, \"&denom\"] }},\n\n \"avg_int32Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"min_int32Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n \"max_int32Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT32\", 64] }, \"&denom\"] }},\n\n \"avg_int64Ops\":{\"$avg\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"min_int64Ops\":{\"$min\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n \"max_int64Ops\":{\"$max\": {\"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_INT64\", 64] }, \"&denom\"] }},\n\n \"avg_f16Ops\":{\"$avg\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"min_f16Ops\":{\"$min\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n \"max_f16Ops\":{\"$max\": {\n \"$divide\": [ \n {\"$add\": [{ \"$multiply\": [64, \"&SQ_INSTS_VALU_ADD_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_MUL_F16\"]}, \n { \"$multiply\": [64, \"&SQ_INSTS_VALU_TRANS_F16\"]}, \n { \"$multiply\": [128, \"&SQ_INSTS_VALU_FMA_F16\"]}, \n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F16\"]}\n ]},\n \"&denom\"]\n }},\n\n\n\n \"avg_bf16Ops\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"min_bf16Ops\":{\"$min\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n \"max_bf16Ops\":{\"$max\": { \"$divide\": [{ \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_BF16\"] }, \"&denom\"] }},\n\n \"avg_f32Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"min_f32Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n \"max_f32Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F32\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F32\"] }\n ] }, \"&denom\"]\n }},\n\n \"avg_f64Ops\":{\"$avg\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"min_f64Ops\":{\"$min\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }},\n \"max_f64Ops\":{\"$max\": {\n \"$divide\": [{ \"$add\": [\n { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", {\"$multiply\": [\"&SQ_INSTS_VALU_FMA_F64\", 2]}] }] },\n { \"$multiply\": [512, \"&SQ_INSTS_VALU_MFMA_MOPS_F64\"] }\n ] }, \"&denom\"]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"FLOPs (Total)\",\n \"Avg\": \"&avg_flops\",\n \"Min\": \"&min_flops\",\n \"Max\": \"&max_flops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"INT8 OPs\",\n \"Avg\": \"&avg_int8Ops\",\n \"Min\": \"&min_int8Ops\",\n \"Max\": \"&max_int8Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F16 OPs\",\n \"Avg\": \"&avg_f16Ops\",\n \"Min\": \"&min_f16Ops\",\n \"Max\": \"&max_f16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"BF16 OPs\",\n \"Avg\": \"&avg_bf16Ops\",\n \"Min\": \"&min_bf16Ops\",\n \"Max\": \"&max_bf16Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n\n {\n \"Metric\": \"F32 OPs\",\n \"Avg\": \"&avg_f32Ops\",\n \"Min\": \"&min_f32Ops\",\n \"Max\": \"&max_f32Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n },\n {\n \"Metric\": \"F64 OPs\",\n \"Avg\": \"&avg_f64Ops\",\n \"Min\": \"&min_f64Ops\",\n \"Max\": \"&max_f64Ops\",\n \"Unit\": {\"$concat\": [\"OPs \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Arithmetic Operations", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 1, + "Avg 2": 2, + "Max 1": 5, + "Max 2": 6, + "Metric 1": 0, + "Metric 2": 7, + "Min 1": 3, + "Min 2": 4, + "Unit 1": 9, + "Unit 2": 8 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 225 + }, + "id": 255, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM", + "target": "${Workload1}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg\": \"&avg_vmemLat\",\n \"Min\": \"&min_vmemLat\",\n \"Max\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM", + "target": "${Workload1}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg\":\"&avg_smemLat\",\n \"Min\":\"&min_smemLat\",\n \"Max\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL", + "target": "${Workload1}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg\":\"&avg_instrFetchLat\",\n \"Min\":\"&min_instrFetchLat\",\n \"Max\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS", + "target": "${Workload1}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg\":\"&avg_ldsLat\",\n \"Min\":\"&min_ldsLat\",\n \"Max\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_VMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_VMEM.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_vmemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"min_vmemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n },\n \"max_vmemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_VMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_VMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"VMEM Latency\",\n \"Avg 2\": \"&avg_vmemLat\",\n \"Min 2\": \"&min_vmemLat\",\n \"Max 2\": \"&max_vmemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_SMEM2", + "target": "${Workload2}.SQ_INST_LEVEL_SMEM.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_smemLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"min_smemLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n },\n \"max_smemLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_SMEM\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_SMEM\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"SMEM Latency\",\n \"Avg 2\":\"&avg_smemLat\",\n \"Min 2\":\"&min_smemLat\",\n \"Max 2\":\"&max_smemLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_IFETCH_LEVEL2", + "target": "${Workload2}.SQ_IFETCH_LEVEL.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_instrFetchLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"min_instrFetchLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n },\n \"max_instrFetchLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_IFETCH\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"Instr Fetch Latency\",\n \"Avg 2\":\"&avg_instrFetchLat\",\n \"Min 2\":\"&min_instrFetchLat\",\n \"Max 2\":\"&max_instrFetchLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "SQ_INST_LEVEL_LDS2", + "target": "${Workload2}.SQ_INST_LEVEL_LDS.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Compute Pipeline)\"}}\n }}, \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLat\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLat\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLat\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"Metric\": \"LDS Latency\",\n \"Avg 2\":\"&avg_ldsLat\",\n \"Min 2\":\"&min_ldsLat\",\n \"Max 2\":\"&max_ldsLat\",\n \"Unit\": \"Cycles\"\n }}\n]);", + "type": "table" + } + ], + "title": "Memory Latencies", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Avg": 1, + "Avg 2": 2, + "Max": 5, + "Max 2": 6, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Avg": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 233 + }, + "id": 98, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Local Data Share (LDS)", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 234 + }, + "id": 205, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu\"}\n ]}, \n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]},\n \n {\"$multiply\": [$sclk, $cu_per_gpu, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$lds_banks_per_cu2\"}\n ]}, \n {\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $cu_per_gpu2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: LDS", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Access Rate 1": 6, + "Access Rate 2": 7, + "Bandwith (Pct-of-Peak) 1": 0, + "Bandwith (Pct-of-Peak) 2": 1, + "Bank Conflict Rate 1": 2, + "Bank Conflict Rate 2": 3, + "Utilization 1": 4, + "Utilization 2": 5 + }, + "renameByName": { + "Access Rate 1": "Access Rate (Current)", + "Access Rate 2": "Access Rate (Baseline)", + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "Utilization 1": "Util (Current)", + "Utilization 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "min": -100000000000000000000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 141 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 234 + }, + "id": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$lds_banks_per_cu\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$lds_banks_per_cu\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$lds_banks_per_cu\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Dispatch_ID Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$lds_banks_per_cu\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$lds_banks_per_cu\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$lds_banks_per_cu\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Dispatch_ID Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + } + ], + "title": "LDS Stats", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "frame", + "frameNameMode": "field" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 246 + }, + "id": 44, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Instruction Cache", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 247 + }, + "id": 48, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $sqc_per_gpu]}, {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\" ] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_ICACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $sqc_per_gpu2]}, {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]} ]}\n ]\n }\n },\n\n \"cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQC_ICACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Instruction Cache ", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW (Pct-of-Peak) 1": 4, + "BW (Pct-of-Peak) 2": 5, + "Cache Hit 1": 6, + "Cache Hit 2": 7, + "Stall 1": 2, + "Stall 2": 3, + "Util 1": 0, + "Util 2": 1 + }, + "renameByName": { + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 247 + }, + "id": 259, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&avg_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&avg_hits\",\n \"Min\": \"&min_hits\",\n \"Max\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&avg_misses\",\n \"Min\": \"&min_misses\",\n \"Max\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean\": \"&avg_misses_dup\",\n \"Min\": \"&min_misses_dup\",\n \"Max\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n \n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&avg_cacheHit\",\n \"Min\": \"&min_cacheHit\",\n \"Max\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"min_req\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"max_req\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n\n \"avg_hits\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"min_hits\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n \"max_hits\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&denom\"] }\n },\n\n \"avg_misses\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"min_misses\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n \"max_misses\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES\", \"&denom\"] }\n },\n\n \"avg_misses_dup\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"min_misses_dup\": {\n \"$min\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"max_misses_dup\": {\n \"$max\": { \"$divide\": [\"&SQC_ICACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"avg_cacheHit\": {\n \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"min_cacheHit\": {\n \"$min\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n },\n \"max_cacheHit\": {\n \"$max\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\", \"&SQC_ICACHE_MISSES_DUPLICATE\"] }] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&avg_req\",\n \"Min 2\": \"&min_req\",\n \"Max 2\": \"&max_req\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&avg_hits\",\n \"Min 2\": \"&min_hits\",\n \"Max 2\": \"&max_hits\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&avg_misses\",\n \"Min 2\" : \"&min_misses\",\n \"Max 2\": \"&max_misses\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Duplicated\",\n \"Mean 2\": \"&avg_misses_dup\",\n \"Min 2\": \"&min_misses_dup\",\n \"Max 2\": \"&max_misses_dup\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&avg_cacheHit\",\n \"Min 2\": \"&min_cacheHit\",\n \"Max 2\": \"&max_cacheHit\",\n \"Unit\": \"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Instruction Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "L1I Metric": "", + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 254 + }, + "id": 203, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Scalar L1 Data Cache", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L1K-TC BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 255 + }, + "id": 54, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk, $sqc_per_gpu]}, {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "sY628IJnz" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"reqBW_pop\": {\n \"$avg\": {\n \"$divide\": [\n {\"$multiply\": [\"&SQC_DCACHE_REQ\", 100000]}, \n {\"$multiply\": [ {\"$multiply\": [$sclk2, $sqc_per_gpu2]}, {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]} ]}\n ]\n }\n },\n \"cacheHit\": {\n \"$avg\": { \n \"$cond\":[\n {\"$ne\": [{ \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0 ]},\n {\"$divide\": [{ \"$multiply\": [\"&SQC_DCACHE_HITS\", 100] }, { \"$add\": [ \"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } ]},\n null\n ]\n }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Bandwidth\": \"&reqBW_pop\",\n \"Cache Hit\": \"&cacheHit\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Scalar L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "BW Pct-of-Peak 1": 0, + "BW Pct-of-Peak 2": 1, + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "Stall 1": 6, + "Stall 2": 7, + "Util 1": 4, + "Util 2": 5 + }, + "renameByName": { + "BW Pct-of-Peak 1": "BW Pct-of-Peak (Current)", + "BW Pct-of-Peak 2": "BW Pct-of-Peak (Baseline)", + "Bandwidth 1": "Bandwidth (Current)", + "Bandwidth 2": "Bandwidth (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 255 + }, + "id": 261, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n \"misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\": \"&req_min\",\n \"Max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\": \"&hits_min\",\n \"Max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean\": \"&misses_avg\",\n \"Min\": \"&misses_min\",\n \"Max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean\": \"&dup_misses_avg\",\n \"Min\": \"&dup_misses_min\",\n \"Max\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean\": \"&cacheHit_avg\",\n \"Min\": \"&cacheHit_min\",\n \"Max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean\": \"&read1d_avg\",\n \"Min\": \"&read1d_min\",\n \"Max\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean\": \"&read2d_avg\",\n \"Min\": \"&read2d_min\",\n \"Max\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean\": \"&read4d_avg\",\n \"Min\": \"&read4d_min\",\n \"Max\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean\": \"&read8d_avg\",\n \"Min\": \"&read8d_min\",\n \"Max\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean\": \"&read16d_avg\",\n \"Min\": \"&read16d_min\",\n \"Max\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "bhNVuvBnk" + }, + "hide": false, + "rawQuery": true, + "refId": "pmc_perf2", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }}, \n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"req_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n\n \"hits_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n \"hits_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&denom\"] }\n },\n\n \"dup_misses_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n \"dup_misses_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_MISSES_DUPLICATE\", \"&denom\"] }\n },\n\n\n \"cacheHit_avg\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_min\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n \"cacheHit_max\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] } , 0]},\n { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\", \"&SQC_DCACHE_MISSES_DUPLICATE\"] }] },\n \"\"\n ]\n }\n },\n\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [{ \"$add\": [\"&SQC_DCACHE_REQ_READ_1\", \"&SQC_DCACHE_REQ_READ_2\", \"&SQC_DCACHE_REQ_READ_4\", \"&SQC_DCACHE_REQ_READ_8\", \"&SQC_DCACHE_REQ_READ_16\"] }, \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_ATOMIC\", \"&denom\"] }\n },\n\n\n \"read1d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n \"read1d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_1\", \"&denom\"] }\n },\n\n \"read2d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n \"read2d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_2\", \"&denom\"] }\n },\n\n \"read4d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n \"read4d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_4\", \"&denom\"] }\n },\n\n \"read8d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n \"read8d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_8\", \"&denom\"] }\n },\n\n \"read16d_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_min\": {\n \"$min\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n },\n \"read16d_max\": {\n \"$max\": { \"$divide\": [\"&SQC_DCACHE_REQ_READ_16\", \"&denom\"] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean 2\": \"&req_avg\",\n \"Min 2\": \"&req_min\",\n \"Max 2\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Hits\",\n \"Mean 2\": \"&hits_avg\",\n \"Min 2\": \"&hits_min\",\n \"Max 2\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses - Non Duplicated\",\n \"Mean 2\": \"&misses_avg\",\n \"Min 2\": \"&misses_min\",\n \"Max 2\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Misses- Duplicated\",\n \"Mean 2\": \"&dup_misses_avg\",\n \"Min 2\": \"&dup_misses_min\",\n \"Max 2\": \"&dup_misses_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Cache Hit\",\n \"Mean 2\": \"&cacheHit_avg\",\n \"Min 2\": \"&cacheHit_min\",\n \"Max 2\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"Metric\": \"Read Req (Total)\",\n \"Mean 2\": \"&readReq_avg\",\n \"Min 2\": \"&readReq_min\",\n \"Max 2\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req (Total)\",\n \"Mean 2\": \"&writeReq_avg\",\n \"Min 2\": \"&writeReq_min\",\n \"Max 2\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean 2\": \"&atomicReq_avg\",\n \"Min 2\": \"&atomicReq_min\",\n \"Max 2\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (1 DWord)\",\n \"Mean 2\": \"&read1d_avg\",\n \"Min 2\": \"&read1d_min\",\n \"Max 2\": \"&read1d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (2 DWord)\",\n \"Mean 2\": \"&read2d_avg\",\n \"Min 2\": \"&read2d_min\",\n \"Max 2\": \"&read2d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (4 DWord)\",\n \"Mean 2\": \"&read4d_avg\",\n \"Min 2\": \"&read4d_min\",\n \"Max 2\": \"&read4d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (8 DWord)\",\n \"Mean 2\": \"&read8d_avg\",\n \"Min 2\": \"&read8d_min\",\n \"Max 2\": \"&read8d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Read Req (16 DWord)\",\n \"Mean 2\": \"&read16d_avg\",\n \"Min 2\": \"&read16d_min\",\n \"Max 2\": \"&read16d_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache Accesses", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Max": 5, + "Max 2": 6, + "Mean": 1, + "Mean 2": 2, + "Metric": 0, + "Min": 3, + "Min 2": 4, + "Unit": 7 + }, + "renameByName": { + "Max": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min": "Min (Current)", + "Min 2": "Min (Baseline)", + "Unit": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 134 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 262 + }, + "id": 52, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Scalar L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"readReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"readReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n\n \"writeReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"writeReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n\n \"atomicReq_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"atomicReq_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n\n \"tc2l1k_stall_avg\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_min\": {\n \"$min\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n },\n \"tc2l1k_stall_max\": {\n \"$max\": { \"$divide\": [\"&SQC_TC_STALL\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read Req\",\n \"Mean\": \"&readReq_avg\",\n \"Min\": \"&readReq_min\",\n \"Max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Write Req\",\n \"Mean\": \"&writeReq_avg\",\n \"Min\": \"&writeReq_min\",\n \"Max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Atomic Req\",\n \"Mean\": \"&atomicReq_avg\",\n \"Min\": \"&atomicReq_min\",\n \"Max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Stall\",\n \"Mean\": \"&tc2l1k_stall_avg\",\n \"Min\": \"&tc2l1k_stall_min\",\n \"Max\": \"&tc2l1k_stall_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Scalar L1D Cache - L2 Interface", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 269 + }, + "id": 130, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Texture Addresser and Texture Data (TA/TD)", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 133 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 112 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 270 + }, + "id": 132, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"taBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}] }},\n \"taBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}] }},\n \"taBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TA_TA_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}] }},\n\n \"tc2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"tc2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"tc2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n\n \"tc2ta_dataStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"tc2ta_dataStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"tc2ta_dataStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_DATA_STALLED_BY_TC_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n\n \"td2ta_addrStall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"td2ta_addrStall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"td2ta_addrStall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TA_ADDR_STALLED_BY_TD_CYCLES_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n\n \"totalInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n \"totalInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_TOTAL_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n \"flatInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\"] }},\n\n \"flatReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatWriteInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatWriteInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"flatAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"flatAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferReadInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferReadInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferWriteInstr_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferWriteInstr_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"bufferAtomicInstr_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n \"bufferAtomicInstr_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\"]}},\n\n \"buffTotal_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n \"buffTotal_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_TOTAL_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleRead_avg\":{\"$avg\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_min\":{\"$min\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleRead_max\":{\"$max\": {\"$divide\": [ \"&TA_BUFFER_COALESCED_READ_CYCLES_sum\", \"&denom\"] }},\n\n \"buffCoscaleWrite_avg\":{\"$avg\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_min\":{\"$min\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }},\n \"buffCoscaleWrite_max\":{\"$max\": { \"$divide\": [ \"&TA_BUFFER_COALESCED_WRITE_CYCLES_sum\", \"&denom\"] }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TA Busy\",\n \"avg\": \"&taBusy_avg\",\n \"min\": \"&taBusy_min\",\n \"max\": \"&taBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Addr Stall\",\n \"avg\": \"&tc2ta_addrStall_avg\",\n \"min\": \"&tc2ta_addrStall_min\",\n \"max\": \"&tc2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TA Data Stall\",\n \"avg\": \"&tc2ta_dataStall_avg\",\n \"min\": \"&tc2ta_dataStall_min\",\n \"max\": \"&tc2ta_dataStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TD2TA Addr Stall\",\n \"avg\": \"&td2ta_addrStall_avg\",\n \"min\": \"&td2ta_addrStall_min\",\n \"max\": \"&td2ta_addrStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Total Instructions\",\n \"avg\": \"&totalInstr_avg\",\n \"min\": \"&totalInstr_min\",\n \"max\": \"&totalInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Instr\",\n \"avg\": \"&flatInstr_avg\",\n \"min\": \"&flatInstr_min\",\n \"max\": \"&flatInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Read Instr\",\n \"avg\": \"&flatReadInstr_avg\",\n \"min\": \"&flatReadInstr_min\",\n \"max\": \"&flatReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Write Instr\",\n \"avg\": \"&flatWriteInstr_avg\",\n \"min\": \"&flatWriteInstr_min\",\n \"max\": \"&flatWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Flat Atomic Instr\",\n \"avg\": \"&flatAtomicInstr_avg\",\n \"min\": \"&flatAtomicInstr_min\",\n \"max\": \"&flatAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Instr\",\n \"avg\": \"&bufferInstr_avg\",\n \"min\": \"&bufferInstr_min\",\n \"max\": \"&bufferInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Read Instr\",\n \"avg\": \"&bufferReadInstr_avg\",\n \"min\": \"&bufferReadInstr_min\",\n \"max\": \"&bufferReadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Write Instr\",\n \"avg\": \"&bufferWriteInstr_avg\",\n \"min\": \"&bufferWriteInstr_min\",\n \"max\": \"&bufferWriteInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Atomic Instr\",\n \"avg\": \"&bufferAtomicInstr_avg\",\n \"min\": \"&bufferAtomicInstr_min\",\n \"max\": \"&bufferAtomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Total Cylces\",\n \"avg\": \"&buffTotal_avg\",\n \"min\": \"&buffTotal_min\",\n \"max\": \"&buffTotal_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Read\",\n \"avg\": \"&buffCoscaleRead_avg\",\n \"min\": \"&buffCoscaleRead_min\",\n \"max\": \"&buffCoscaleRead_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Buffer Coalesced Write\",\n \"avg\": \"&buffCoscaleWrite_avg\",\n \"min\": \"&buffCoscaleWrite_min\",\n \"max\": \"&buffCoscaleWrite_max\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "TA", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 124 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 135 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 108 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 136 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 12, + "y": 270 + }, + "id": 134, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Texture Addr and Data)\"}}\n }},\n {\"$addFields\": {\n \n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"tdBusy_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}] }},\n \"tdBusy_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}] }},\n \"tdBusy_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TD_BUSY_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}] }},\n\n \"tc2tdStall_avg\":{\"$avg\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"tc2tdStall_min\":{\"$min\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"tc2tdStall_max\":{\"$max\": { \"$divide\": [ { \"$multiply\": [100, \"&TD_TC_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n\n \"spi2td_stall_avg\":{\"$avg\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"spi2td_stall_min\":{\"$min\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n \"spi2td_stall_max\":{\"$max\": {\"$divide\": [ { \"$multiply\": [100, \"&TD_SPI_STALL_sum\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $cu_per_gpu2]}]}},\n\n \"coscaleInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_min\":{\"$min\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n \"coscaleInstr_max\":{\"$max\": {\"$divide\": [\"&TD_COALESCABLE_WAVEFRONT_sum\", \"&denom\"] }},\n\n \"loadInstr_avg\":{\"$avg\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_min\":{\"$min\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n \"loadInstr_max\":{\"$max\": {\"$divide\": [{ \"$subtract\": [{ \"$subtract\": [\"&TD_LOAD_WAVEFRONT_sum\", \"&TD_STORE_WAVEFRONT_sum\"]}, \"&TD_ATOMIC_WAVEFRONT_sum\"] }, \"&denom\" ] }},\n\n \"storeInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_min\":{\"$min\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n \"storeInstr_max\":{\"$max\": {\"$divide\": [\"&TD_STORE_WAVEFRONT_sum\", \"&denom\" ] }},\n\n \"atomicInstr_avg\":{\"$avg\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_min\":{\"$min\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}},\n \"atomicInstr_max\":{\"$max\": {\"$divide\": [\"&TD_ATOMIC_WAVEFRONT_sum\", \"&denom\" ]}}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"TD Busy\",\n \"avg\": \"&tdBusy_avg\",\n \"min\": \"&tdBusy_min\",\n \"max\": \"&tdBusy_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"TC2TD Stall\",\n \"avg\": \"&tc2tdStall_avg\",\n \"min\": \"&tc2tdStall_min\",\n \"max\": \"&tc2tdStall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"SPI2TD Stall\",\n \"avg\": \"&spi2td_stall_avg\",\n \"min\": \"&spi2td_stall_min\",\n \"max\": \"&spi2td_stall_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Coalescable Instr\",\n \"avg\": \"&coscaleInstr_avg\",\n \"min\": \"&coscaleInstr_min\",\n \"max\": \"&coscaleInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Load Instr\",\n \"avg\": \"&loadInstr_avg\",\n \"min\": \"&loadInstr_min\",\n \"max\": \"&loadInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Store Instr\",\n \"avg\": \"&storeInstr_avg\",\n \"min\": \"&storeInstr_min\",\n \"max\": \"&storeInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Instr\",\n \"avg\": \"&atomicInstr_avg\",\n \"min\": \"&atomicInstr_min\",\n \"max\": \"&atomicInstr_max\",\n \"Unit\": {\"$concat\": [\"Instr \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "TD", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 287 + }, + "id": 112, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Vector L1 Data Cache", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 288 + }, + "id": 165, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $cu_per_gpu]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"bufferCoalescing_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n null\n ]\n }\n },\n \"cacheUtil_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TCP_GATE_EN2_sum\", 100]}, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }\n },\n \"cacheBW_pct\": {\n \"$avg\": { \"$divide\": [ {\"$multiply\":[64, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"]}, \n {\"$subtract\":[\"&End_Timestamp\", \"&Start_Timestamp\"]} \n ] \n }\n },\n \"cacheHit_pct\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]\n }] \n },\n null\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Buffer Coalescing\": \"&bufferCoalescing_pct\",\n \"Cache Util\": \"&cacheUtil_pct\",\n \"Cache BW\": { \"$divide\": [{ \"$multiply\": [100, \"&cacheBW_pct\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk2, 1000] }, 64] }, $cu_per_gpu2]}] },\n \"Cache Hit\": \"&cacheHit_pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: Vector L1D Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "128B Read Combining 1": 6, + "128B Read Combining 2": 7, + "Buffer Coalescing 1": 0, + "Buffer Coalescing 2": 1, + "Cache BW 1": 2, + "Cache BW 2": 3, + "Cache Hit 1": 4, + "Cache Hit 2": 5 + }, + "renameByName": { + "128B Read Combining 1": "128B Read Combining (Current)", + "128B Read Combining 2": "128B Read Combining(Baseline)", + "Buffer Coalescing 1": "Buf Coalescing (Current)", + "Buffer Coalescing 2": "Buf Coalescing (Baseline)", + "Cache BW 1": "Cache BW (Current)", + "Cache BW 2": "Cache BW (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Cache Util 1": "Cache Util (Current)", + "Cache Util 2": "Cache Util (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-background" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 52 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 199 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 144 + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 288 + }, + "id": 116, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"l2Pending_avg\": {\"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_min\": {\"$min\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"l2Pending_max\": {\"$max\": {\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_PENDING_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n\n \"tcr2tcp_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"tcr2tcp_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \"readTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \"readTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_READ_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"]},\n null\n ]\n }},\n \n \"writeTagRam_Stall_avg\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_min\": {\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \"writeTagRam_Stall_max\": {\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }},\n \n \n \"atomicTagRam_Stall_avg\":{\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_min\":{\"$min\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }}, \n \"atomicTagRam_Stall_max\":{\"$max\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n null\n ]\n }} \n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Stalled on L2 Data\",\n \"Mean\": \"&l2Pending_avg\",\n \"Min\": \"&l2Pending_min\",\n \"Max\": \"&l2Pending_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Stalled on L2 Req\",\n \"Mean\": \"&tcr2tcp_Stall_avg\",\n \"Min\":\"&tcr2tcp_Stall_min\",\n \"Max\":\"&tcr2tcp_Stall_max\",\n \"unit\":\"pct\"\n },\n\n {\n \"Metric\": \"Tag RAM Stall (Read)\",\n \"Mean\": \"&readTagRam_Stall_avg\",\n \"Min\": \"&readTagRam_Stall_min\",\n \"Max\": \"&readTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Write)\",\n \"Mean\": \"&writeTagRam_Stall_avg\",\n \"Min\": \"&writeTagRam_Stall_min\",\n \"Max\": \"&writeTagRam_Stall_max\",\n \"unit\":\"pct\"\n },\n {\n \"Metric\": \"Tag RAM Stall (Atomic)\",\n \"Mean\": \"&atomicTagRam_Stall_avg\",\n \"Min\": \"&atomicTagRam_Stall_min\",\n \"Max\": \"&atomicTagRam_Stall_max\",\n \"unit\":\"pct\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Stalls", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true, + "unit 2": true + }, + "indexByName": { + "Max 1": 6, + "Max 2": 7, + "Mean 1": 2, + "Mean 2": 3, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 4, + "Min 2": 5, + "unit 1": 9, + "unit 2": 8 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)", + "unit 1": "Unit" + } + } + } + ], + "type": "table" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 116 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 78 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 139 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 50 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 127 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 12, + "x": 0, + "y": 301 + }, + "id": 128, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n\n \"totalReq_avg\":{\"$avg\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_min\":{\"$min\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n \"totalReq_max\":{\"$max\": { \"$divide\": [ \"&TCP_TOTAL_ACCESSES_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"]}},\n\n \"cacheBW_avg\":{\"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }},\n \"cacheBW_min\":{\"$min\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }},\n \"cacheBW_max\":{\"$max\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }},\n \n \"cacheAccess_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n \"cacheAccess_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \"&denom\"]}},\n\n \"cacheHits_avg\":{\"$avg\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_min\":{\"$min\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \"cacheHits_max\":{\"$max\": {\"$divide\": [ \n { \"$subtract\": [\n \"&TCP_TOTAL_CACHE_ACCESSES_sum\", \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}\n ]}, \n \"&denom\"\n ]}\n },\n \n \"cacheHitRate_avg\":{\"$avg\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_min\":{\"$min\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n \"cacheHitRate_max\":{\"$max\": {\n \"$cond\": [{\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]}, \n {\"$subtract\": [100, \n { \"$divide\": [ \n { \"$multiply\": [100, \n { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }\n ] \n }, \n \"&TCP_TOTAL_CACHE_ACCESSES_sum\"\n ]}\n ]},\n null\n ]\n }},\n\n \"l2_l1_read_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n \"l2_l1_read_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_READ_REQ_sum\", \"&denom\"]}},\n\n \"l2_l1_write_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n \"l2_l1_write_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }},\n\n\n \"l2_l1_atomic_avg\":{\"$avg\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_min\":{\"$min\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n \"l2_l1_atomic_max\":{\"$max\": {\"$divide\": [ { \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }},\n\n \"l2_l1_bw_avg\":{\"$avg\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_min\":{\"$min\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n \"l2_l1_bw_max\":{\"$max\": {\"$divide\": [{\"$multiply\": [64, {\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] } ]}, \"&denom\" ]}},\n\n \"invalidate_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_min\":{\"$min\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n \"invalidate_max\":{\"$max\": {\"$divide\": [ \"&TCP_TOTAL_WRITEBACK_INVALIDATES_sum\", \"&denom\"] }},\n\n\n \"l1Latency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n \"l1Latency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0]},\n {\"$divide\": [ \"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\" ]},\n null\n ] \n }},\n\n\n \"l2ReadLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n \"l2ReadLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }]},\n null\n ] \n }},\n\n \"l2WriteLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }},\n \"l2WriteLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{\"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"]}, 0]},\n {\"$divide\": [ \"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]},\n null\n ]\n }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Total Req\",\n \"avg\": \"&totalReq_avg\",\n \"min\": \"&totalReq_min\",\n \"max\": \"&totalReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache BW\",\n \"avg\": \"&cacheBW_avg\",\n \"min\": \"&cacheBW_min\",\n \"max\": \"&cacheBW_max\",\n \"Unit\": \"GB/s\"\n },\n {\n \"metric\": \"Cache Accesses\",\n \"avg\": \"&cacheAccess_avg\",\n \"min\": \"&cacheAccess_min\",\n \"max\": \"&cacheAccess_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hits\",\n \"avg\": \"&cacheHits_avg\",\n \"min\": \"&cacheHits_min\",\n \"max\": \"&cacheHits_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit Rate\",\n \"avg\": \"&cacheHitRate_avg\",\n \"min\": \"&cacheHitRate_min\",\n \"max\": \"&cacheHitRate_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Invalidate\",\n \"avg\": \"&invalidate_avg\",\n \"min\": \"&invalidate_min\",\n \"max\": \"&invalidate_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 BW\",\n \"avg\": \"&l2_l1_bw_avg\",\n \"min\": \"&l2_l1_bw_avg\",\n \"max\": \"&l2_l1_bw_avg\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Read\",\n \"avg\": \"&l2_l1_read_avg\",\n \"min\": \"&l2_l1_read_min\",\n \"max\": \"&l2_l1_read_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Write\",\n \"avg\": \"&l2_l1_write_avg\",\n \"min\": \"&l2_l1_write_min\",\n \"max\": \"&l2_l1_write_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1-L2 Atomic\",\n \"avg\": \"&l2_l1_atomic_avg\",\n \"min\": \"&l2_l1_atomic_min\",\n \"max\": \"&l2_l1_atomic_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"L1 Access Latency\",\n \"avg\": \"&l1Latency_avg\",\n \"min\": \"&l1Latency_min\",\n \"max\": \"&l1Latency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Read Latency\",\n \"avg\": \"&l2ReadLatency_avg\",\n \"min\": \"&l2ReadLatency_min\",\n \"max\": \"&l2ReadLatency_max\",\n \"Unit\": \"Cycles\"\n },\n {\n \"metric\": \"L1-L2 Write Latency\",\n \"avg\": \"&l2WriteLatency_avg\",\n \"min\": \"&l2WriteLatency_min\",\n \"max\": \"&l2WriteLatency_max\",\n \"Unit\": \"Cycles\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Coherency", + "GroupCols": 2, + "GroupGap": 5, + "GroupLabelColor": "#FF9830", + "GroupLabelFontSize": "100%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Xfer", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FADE2A", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:172", + "Col": 2, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Mean", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 301 + }, + "id": 120, + "pluginVersion": "8.2.1", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "tlh8EwUnk" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n\n \"readNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \"readRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_READ_REQ_sum\", \"&denom\"] }\n },\n \n \"writeNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"writeRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_WRITE_REQ_sum\", \"&denom\"] }\n },\n \n \"atomicNC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicNC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_NC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicUC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_UC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicCC_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_CC_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_avg\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_min\": {\n \"$min\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n },\n \"atomicRW_max\": {\n \"$max\": { \"$divide\": [\"&TCP_TCC_RW_ATOMIC_REQ_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&readNC_avg\",\n \"Min\": \"&readNC_min\",\n \"Max\": \"&readNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&readUC_avg\",\n \"Min\": \"&readUC_min\",\n \"Max\": \"&readUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&readCC_avg\",\n \"Min\": \"&readCC_min\",\n \"Max\": \"&readCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Read\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&readRW_avg\",\n \"Min\": \"&readRW_min\",\n \"Max\": \"&readRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&writeRW_avg\",\n \"Min\": \"&writeRW_min\",\n \"Max\": \"&writeRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&writeNC_avg\",\n \"Min\": \"&writeNC_min\",\n \"Max\": \"&writeNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&writeUC_avg\",\n \"Min\": \"&writeUC_min\",\n \"Max\": \"&writeUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Write\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&writeCC_avg\",\n \"Min\": \"&writeCC_min\",\n \"Max\": \"&writeCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"NC\",\n \"Avg\": \"&atomicNC_avg\",\n \"Min\": \"&atomicNC_min\",\n \"Max\": \"&atomicNC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"UC\",\n \"Avg\": \"&atomicUC_avg\",\n \"Min\": \"&atomicUC_min\",\n \"Max\": \"&atomicUC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"CC\",\n \"Avg\": \"&atomicCC_avg\",\n \"Min\": \"&atomicCC_min\",\n \"Max\": \"&atomicCC_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Xfer\": \"Atomic\",\n \"Coherency\": \"RW\",\n \"Avg\": \"&atomicRW_avg\",\n \"Min\": \"&atomicRW_min\",\n \"Max\": \"&atomicRW_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D - L2 Transactions Req $normUnit", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Units" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 309 + }, + "id": 124, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Vector L1D Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_REQUEST_sum\" ,\"&denom\"] }},\n\n\n \"hitRatio_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n \"hitRatio_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCP_UTCL1_REQUEST_sum\", 0]},\n {\"$divide\": [{ \"$multiply\": [100, \"&TCP_UTCL1_TRANSLATION_HIT_sum\"] },\"&TCP_UTCL1_REQUEST_sum\"]},\n null\n ]\n }},\n\n \"hits_avg\":{\"$avg\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_min\":{\"$min\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n \"hits_max\":{\"$max\": { \"$divide\": [\"&TCP_UTCL1_TRANSLATION_HIT_sum\" ,\"&denom\"] }},\n\n \"missesTrans_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n \"missesTrans_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_TRANSLATION_MISS_sum\" ,\"&denom\"] }},\n\n \"missesPermis_avg\":{\"$avg\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_min\":{\"$min\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }},\n \"missesPermis_max\":{\"$max\": {\"$divide\": [\"&TCP_UTCL1_PERMISSION_MISS_sum\" ,\"&denom\"] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&req_avg\",\n \"Min\":\"&req_min\",\n \"Max\":\"&req_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Hit Ratio\",\n \"Mean\": \"&hitRatio_avg\",\n \"Min\":\"&hitRatio_min\",\n \"Max\":\"&hitRatio_max\",\n \"Units\":\"pct\"\n },\n {\n \"Metric\": \"Hits\",\n \"Mean\": \"&hits_avg\",\n \"Min\":\"&hits_min\",\n \"Max\":\"&hits_max\",\n \"Units\":{ \"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Translation)\",\n \"Mean\": \"&missesTrans_avg\",\n \"Min\":\"&missesTrans_min\",\n \"Max\":\"&missesTrans_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n },\n {\n \"Metric\": \"Misses (Permission)\",\n \"Mean\": \"&missesPermis_avg\",\n \"Min\":\"&missesPermis_min\",\n \"Max\":\"&missesPermis_max\",\n \"Units\":{\"$concat\": [\"\", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Vector L1D Addr Translation", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Units 2": true + }, + "indexByName": { + "Max 1": 7, + "Max 2": 8, + "Mean 1": 3, + "Mean 2": 4, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Units 1": 9, + "Units 2": 2 + }, + "renameByName": { + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Mean 1": "Avg (Current)", + "Mean 2": "Avg (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 319 + }, + "id": 56, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW - GB/s" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW " + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + }, + { + "id": "color" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2 Util" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 100 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache Hit" + }, + "properties": [ + { + "id": "max", + "value": 100 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Wr BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "L2-EA Rd BW" + }, + "properties": [ + { + "id": "unit", + "value": "GBs" + }, + { + "id": "max", + "value": 1638 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 320 + }, + "id": 64, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$lds_banks_per_cu\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n\n{\"$group\": {\n \"_id\": null,\n \"L2Util\": {\n \"$avg\": { \n \"$divide\": [\n {\"$multiply\": [\"&TCC_BUSY_sum\", 100]},\n {\"$multiply\": [{\"$toInt\":\"$lds_banks_per_cu2\"}, \"&GRBM_GUI_ACTIVE\"] }\n ] \n } \n },\n\n \"cacheHit\": {\n \"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n 0\n ] \n }},\n\n \"l2eaRdBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n },\n \n \"l2eaWrBW\": {\n \"$avg\": { \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, { \"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\"] } ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"L2 Util\": \"&L2Util\",\n \"Cache Hit\": \"&cacheHit\",\n \"L2-EA Rd BW\": \"&l2eaRdBW\",\n \"L2-EA Wr BW\": \"&l2eaWrBW\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: L2 Cache", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Cache Hit 1": 2, + "Cache Hit 2": 3, + "L2 Util 1": 0, + "L2 Util 2": 1, + "L2-EA Rd BW 1": 4, + "L2-EA Rd BW 2": 5, + "L2-EA Wr BW 1": 6, + "L2-EA Wr BW 2": 7 + }, + "renameByName": { + "Cache Hit 1": "L2 Cache Hit (Current)", + "Cache Hit 2": "L2 Cache Hit (Baseline)", + "L2 Util 1": "L2 Util (Current)", + "L2 Util 2": "L2 Util (Baseline)", + "L2-EA Rd BW - GB/s 1": "L2-EA RD BW (Current)", + "L2-EA Rd BW - GB/s 2": "L2-EA RD BW (baseline)", + "L2-EA Rd BW 1": "L2-EA Rd BW (Current)", + "L2-EA Rd BW 2": "L2-EA Rd BW (Baseline)", + "L2-EA Wr BW - GB/s 1": "L2-EA WR BW (Current)", + "L2-EA Wr BW - GB/s 2": "L2-EA WR BW (Baseline)", + "L2-EA Wr BW 1": "L2-EA Wr BW (Current)", + "L2-EA Wr BW 2": "L2-EA Wr BW (Baseline)" + } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 106 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Metric" + }, + "properties": [ + { + "id": "custom.width", + "value": 148 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 320 + }, + "id": 62, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"readStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"readStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"writeStall_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n \"writeStall_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_BUSY_sum\", 0]},\n { \"$divide\": [ \n {\"$multiply\": [100, {\"$add\":[\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\"]}]},\n \"&TCC_BUSY_sum\" \n ]},\n null\n ]\n }},\n\n \"readBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n \"readBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] } ] }, \"&denom\" ]\n }},\n\n \"writeBW_avg\":{\"$avg\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_min\":{\"$min\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n \"writeBW_max\":{\"$max\": {\n \"$divide\": [ { \"$add\" : [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64]}, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] } ] }, \"&denom\" ]\n }},\n\n \"read32_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n \"read32_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_32B_sum\", \"&denom\" ]\n }},\n\n \"read32Uncached_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"read32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RD_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"read64_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n \"read64_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, \"&denom\" ]\n }},\n\n \"hbmRead_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmRead_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"write32_avg\":{\"$avg\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_min\":{\"$min\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n \"write32_max\":{\"$max\": {\n \"$divide\": [ { \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, \"&denom\" ]\n }},\n\n \"write32Uncached_avg\": {\"$avg\":{\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n \"write32Uncached_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WR_UNCACHED_32B_sum\", \"&denom\" ]\n }},\n\n \"write64_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n \"write64_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_64B_sum\", \"&denom\" ]\n }},\n\n \"hbmWrite_avg\":{\"$avg\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_min\":{\"$min\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n \"hbmWrite_max\":{\"$max\": {\n \"$divide\": [ \"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\" ]\n }},\n\n \"readLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n \"readLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_RDREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\" ] },\n null\n ]\n }},\n\n \"writeLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n \"writeLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_WRREQ_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\" ] },\n null\n ]\n }},\n\n \"atomicOpLatency_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }},\n \"atomicOpLatency_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0]},\n { \"$divide\": [ \"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\" ] },\n null\n ]\n }}\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Read BW\",\n \"Avg\": \"&readBW_avg\",\n \"Min\":\"&readBW_min\",\n \"Max\":\"&readBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Write BW\",\n \"Avg\": \"&writeBW_avg\",\n \"Min\":\"&writeBW_min\",\n \"Max\":\"&writeBW_max\",\n \"Unit\":{\"$concat\": [\"Bytes \", $normUnit] }\n },\n {\n \"Metric\": \"Read (32B)\",\n \"Avg\": \"&read32_avg\",\n \"Min\":\"&read32_min\",\n \"Max\":\"&read32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (Uncached 32B)\",\n \"Avg\": \"&read32Uncached_avg\",\n \"Min\":\"&read32Uncached_min\",\n \"Max\":\"&read32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read (64B)\",\n \"Avg\": \"&read64_avg\",\n \"Min\":\"&read64_min\",\n \"Max\":\"&read64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"HBM Read\",\n \"Avg\": \"&hbmRead_avg\",\n \"Min\":\"&hbmRead_min\",\n \"Max\":\"&hbmRead_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (32B)\",\n \"Avg\": \"&write32_avg\",\n \"Min\":\"&write32_min\",\n \"Max\":\"&write32_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (Uncached 32B)\",\n \"Avg\": \"&write32Uncached_avg\",\n \"Min\":\"&write32Uncached_min\",\n \"Max\":\"&write32Uncached_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Write (64B)\",\n \"Avg\": \"&write64_avg\",\n \"Min\":\"&write64_min\",\n \"Max\":\"&write64_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n\n {\n \"Metric\": \"HBM Write\",\n \"Avg\": \"&hbmWrite_avg\",\n \"Min\":\"&hbmWrite_min\",\n \"Max\":\"&hbmWrite_max\",\n \"Unit\":{\"$concat\": [\"Req \", $normUnit] }\n },\n {\n \"Metric\": \"Read Latency\",\n \"Avg\": \"&readLatency_avg\",\n \"Min\":\"&readLatency_min\",\n \"Max\":\"&readLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Write Latency\",\n \"Avg\": \"&writeLatency_avg\",\n \"Min\":\"&writeLatency_min\",\n \"Max\":\"&writeLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Atomic Latency\",\n \"Avg\": \"&atomicOpLatency_avg\",\n \"Min\":\"&atomicOpLatency_min\",\n \"Max\":\"&atomicOpLatency_max\",\n \"Unit\":\"Cycles\"\n },\n {\n \"Metric\": \"Read Stall\",\n \"Avg\": \"&readStall_avg\",\n \"Min\":\"&readStall_min\",\n \"Max\":\"&readStall_max\",\n \"Unit\":\"pct\"\n },\n {\n \"Metric\": \"Write Stall\",\n \"Avg\": \"&writeStall_avg\",\n \"Min\":\"&writeStall_min\",\n \"Max\":\"&writeStall_max\",\n \"Unit\":\"pct\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Transactions", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric 2": true, + "Unit 2": true + }, + "indexByName": { + "Avg 1": 3, + "Avg 2": 4, + "Max 1": 7, + "Max 2": 8, + "Metric 1": 0, + "Metric 2": 1, + "Min 1": 5, + "Min 2": 6, + "Unit 1": 9, + "Unit 2": 2 + }, + "renameByName": { + "Avg 1": "Avg (Current)", + "Avg 2": "Avg (Baseline)", + "Max 1": "Max (Current)", + "Max 2": "Max (Baseline)", + "Min 1": "Min (Current)", + "Min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 178 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 121 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ + { + "id": "custom.width", + "value": 126 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + } + ] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 327 + }, + "id": 58, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(L2 Cache)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"req_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_min\":{\"$min\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n \"req_max\":{\"$max\": { \"$divide\": [ \"&TCC_REQ_sum\", \"&denom\"] }},\n\n \"streamingReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n \"streamingReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_STREAMING_REQ_sum\", \"&denom\" ] }},\n\n \"readReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n \"readReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_READ_sum\", \"&denom\" ] }},\n\n \"writeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n \"writeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITE_sum\", \"&denom\" ]}},\n\n \"atomicReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n \"atomicReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_ATOMIC_sum\", \"&denom\" ]}},\n\n \"probeReq_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_min\":{\"$min\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n \"probeReq_max\":{\"$max\": {\"$divide\": [ \"&TCC_PROBE_sum\", \"&denom\" ]}},\n\n \"hits_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_min\":{\"$min\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n \"hits_max\":{\"$max\": {\"$divide\": [ \"&TCC_HIT_sum\", \"&denom\"] }},\n\n \"misses_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_min\":{\"$min\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n \"misses_max\":{\"$max\": { \"$divide\": [ \"&TCC_MISS_sum\", \"&denom\"] }},\n\n \"cacheHit_avg\":{\"$avg\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_min\":{\"$min\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n \"cacheHit_max\":{\"$max\": {\n \"$cond\": [\n {\"$ne\": [{ \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] }, 0]},\n { \"$divide\": [ { \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [ \"&TCC_HIT_sum\", \"&TCC_MISS_sum\" ] } ] },\n null\n ] \n }},\n\n\n \"writeback_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_min\":{\"$min\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n \"writeback_max\":{\"$max\": {\"$divide\": [ \"&TCC_WRITEBACK_sum\", \"&denom\"] }},\n\n \"nc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_min\":{\"$min\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n \"nc_max\":{\"$max\": {\"$divide\": [ \"&TCC_NC_REQ_sum\", \"&denom\" ] }},\n\n \"uc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_min\":{\"$min\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n \"uc_max\":{\"$max\": {\"$divide\": [ \"&TCC_UC_REQ_sum\", \"&denom\" ] }},\n\n \"cc_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_min\":{\"$min\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n \"cc_max\":{\"$max\": {\"$divide\": [ \"&TCC_CC_REQ_sum\", \"&denom\" ] }},\n\n \"rw_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_min\":{\"$min\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n \"rw_max\":{\"$max\": {\"$divide\": [ \"&TCC_RW_REQ_sum\", \"&denom\" ] }},\n\n \"writebackNorm_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_min\":{\"$min\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n \"writebackNorm_max\":{\"$max\": {\"$divide\": [ \"&TCC_NORMAL_WRITEBACK_sum\", \"&denom\" ]}},\n\n \"writebackTC_avg\":{\"$avg\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_min\":{\"$min\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n \"writebackTC_max\":{\"$max\": {\"$divide\": [ \"&TCC_ALL_TC_OP_WB_WRITEBACK_sum\", \"&denom\" ] }},\n\n \"evictNorm_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_min\":{\"$min\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n \"evictNorm_max\":{\"$max\": { \"$divide\": [ \"&TCC_NORMAL_EVICT_sum\", \"&denom\" ] }},\n\n \"evictTC_avg\":{\"$avg\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_min\":{\"$min\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n \"evictTC_max\":{\"$max\": { \"$divide\": [ \"&TCC_ALL_TC_OP_INV_EVICT_sum\", \"&denom\" ] }},\n\n \"readReq128_avg\":{\"$avg\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_min\":{\"$min\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }},\n \"readReq128_max\":{\"$max\": {\"$divide\": [ \"&TCP_TCR_REQ_XFER128B_COMBINING_sum\", \"&denom\" ] }}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Req\",\n \"avg\": \"&req_avg\",\n \"min\": \"&req_min\",\n \"max\": \"&req_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Streaming Req\",\n \"avg\": \"&streamingReq_avg\",\n \"min\": \"&streamingReq_min\",\n \"max\": \"&streamingReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Read Req\",\n \"avg\": \"&readReq_avg\",\n \"min\": \"&readReq_min\",\n \"max\": \"&readReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n \n {\n \"metric\": \"Write Req\",\n \"avg\": \"&writeReq_avg\",\n \"min\": \"&writeReq_min\",\n \"max\": \"&writeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Req\",\n \"avg\": \"&atomicReq_avg\",\n \"min\": \"&atomicReq_min\",\n \"max\": \"&atomicReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Probe Req\",\n \"avg\": \"&probeReq_avg\",\n \"min\": \"&probeReq_min\",\n \"max\": \"&probeReq_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Hits\",\n \"avg\": \"&hits_avg\",\n \"min\": \"&hits_min\",\n \"max\": \"&hits_max\",\n \"Unit\": {\"$concat\": [\"Hits \", $normUnit]}\n }, \n {\n \"metric\": \"Misses\",\n \"avg\": \"&misses_avg\",\n \"min\": \"&misses_min\",\n \"max\": \"&misses_max\",\n \"Unit\": {\"$concat\": [\"Misses \", $normUnit]}\n },\n {\n \"metric\": \"Cache Hit\",\n \"avg\": \"&cacheHit_avg\",\n \"min\": \"&cacheHit_min\",\n \"max\": \"&cacheHit_max\",\n \"Unit\": \"pct\"\n },\n {\n \"metric\": \"Writeback\",\n \"avg\": \"&writeback_avg\",\n \"min\": \"&writeback_min\",\n \"max\": \"&writeback_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"NC Req\",\n \"avg\": \"&nc_avg\",\n \"min\": \"&nc_min\",\n \"max\": \"&nc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"UC Req\",\n \"avg\": \"&uc_avg\",\n \"min\": \"&uc_min\",\n \"max\": \"&uc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"CC Req\",\n \"avg\": \"&cc_avg\",\n \"min\": \"&cc_min\",\n \"max\": \"&cc_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"RW Req\",\n \"avg\": \"&rw_avg\",\n \"min\": \"&rw_min\",\n \"max\": \"&rw_max\",\n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"metric\": \"Writeback (Normal)\",\n \"avg\": \"&writebackNorm_avg\",\n \"min\": \"&writebackNorm_min\",\n \"max\": \"&writebackNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Writeback (TC Req)\",\n \"avg\": \"&writebackTC_avg\",\n \"min\": \"&writebackTC_min\",\n \"max\": \"&writebackTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (Normal)\",\n \"avg\": \"&evictNorm_avg\",\n \"min\": \"&evictNorm_min\",\n \"max\": \"&evictNorm_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n },\n {\n \"metric\": \"Evict (TC Req)\",\n \"avg\": \"&evictTC_avg\",\n \"min\": \"&evictTC_min\",\n \"max\": \"&evictTC_max\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "L2 Cache Accesses", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 2, + "avg 1": 3, + "avg 2": 4, + "max 1": 7, + "max 2": 8, + "metric 1": 0, + "metric 2": 1, + "min 1": 5, + "min 2": 6 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 30, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "Transaction", + "GroupCols": 1, + "GroupGap": 5, + "GroupLabelColor": "#FADE2A", + "GroupLabelFontSize": "120%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": true, + "LabelColName": "Metric", + "LabelColor": "#ffffff", + "LabelFontSize": "80%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 0, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#FF9830", + "ValueDecimals": 0, + "ValueFontSize": "100%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:81", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Selected": true + } + ], + "datasource": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 336 + }, + "id": 60, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"ioStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_READ_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_READ_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"ioStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"ioStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_IO_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"gmiStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"gmiStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_GMI_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"hbmStall_WRITE_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_min\": {\n \"$min\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n \"hbmStall_WRITE_max\": {\n \"$max\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum\", \"&denom\"] }\n },\n\n \"creditStarvation_avg\": {\n \"$avg\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_min\": {\n \"$min\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n },\n \"creditStarvation_max\": {\n \"$max\": { \"$divide\": [\"&TCC_TOO_MANY_EA_WRREQS_STALL_sum\", \"&denom\"] }\n } \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_READ_avg\",\n \"Min\": \"&ioStall_READ_min\",\n \"Max\": \"&ioStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_READ_avg\",\n \"Min\": \"&gmiStall_READ_min\",\n \"Max\": \"&gmiStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Read\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_READ_avg\",\n \"Min\": \"&hbmStall_READ_min\",\n \"Max\": \"&hbmStall_READ_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Remote Socket Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"I/O\",\n \"Avg\": \"&ioStall_WRITE_avg\",\n \"Min\": \"&ioStall_WRITE_min\",\n \"Max\": \"&ioStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Peer GCD Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"GMI\",\n \"Avg\": \"&gmiStall_WRITE_avg\",\n \"Min\": \"&gmiStall_WRITE_min\",\n \"Max\": \"&gmiStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"HBM Stall\",\n \"Transaction\": \"Write\",\n \"Target\": \"HBM\",\n \"Avg\": \"&hbmStall_WRITE_avg\",\n \"Min\": \"&hbmStall_WRITE_min\",\n \"Max\": \"&hbmStall_WRITE_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n },\n {\n \"Metric\": \"Credit Starvation\",\n \"Transaction\": \"Write\",\n \"Target\": \"Fabric\",\n \"Avg\": \"&creditStarvation_avg\",\n \"Min\": \"&creditStarvation_min\",\n \"Max\": \"&creditStarvation_max\", \n \"Unit\": {\"$concat\": [\"Req \", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - Fabric Interface Stalls (Cycles $normUnit)", + "transparent": true, + "type": "michaeldmoore-multistat-panel" + }, + { + "collapsed": false, + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 347 + }, + "id": 66, + "panels": [], + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "L2 Cache (per Channel)", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 348 + }, + "id": 314, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": \"null\",\n \"mean_hit_rate\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[29]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_hit_rate\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_hit_rate\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_hit_rate\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n { \"$multiply\": [100, \"&TCC_HIT[0]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[1]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[2]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[3]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[4]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[5]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[6]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[7]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[8]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[9]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[10]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[11]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[12]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[13]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[14]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[15]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[16]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[17]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[18]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[19]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[20]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[21]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[22]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[23]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[24]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[25]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[26]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[27]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[28]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[29]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[30]\"] },\n { \"$multiply\": [100, \"&TCC_HIT[31]\"] }\n ]\n },\n {\n \"$add\":[\n { \"$add\": [\"&TCC_MISS[0]\", \"&TCC_HIT[0]\"] },\n { \"$add\": [\"&TCC_MISS[1]\", \"&TCC_HIT[1]\"] },\n { \"$add\": [\"&TCC_MISS[2]\", \"&TCC_HIT[2]\"] },\n { \"$add\": [\"&TCC_MISS[3]\", \"&TCC_HIT[3]\"] },\n { \"$add\": [\"&TCC_MISS[4]\", \"&TCC_HIT[4]\"] },\n { \"$add\": [\"&TCC_MISS[5]\", \"&TCC_HIT[5]\"] },\n { \"$add\": [\"&TCC_MISS[6]\", \"&TCC_HIT[6]\"] },\n { \"$add\": [\"&TCC_MISS[7]\", \"&TCC_HIT[7]\"] },\n { \"$add\": [\"&TCC_MISS[8]\", \"&TCC_HIT[8]\"] },\n { \"$add\": [\"&TCC_MISS[9]\", \"&TCC_HIT[9]\"] },\n { \"$add\": [\"&TCC_MISS[10]\", \"&TCC_HIT[10]\"] },\n { \"$add\": [\"&TCC_MISS[11]\", \"&TCC_HIT[11]\"] },\n { \"$add\": [\"&TCC_MISS[12]\", \"&TCC_HIT[12]\"] },\n { \"$add\": [\"&TCC_MISS[13]\", \"&TCC_HIT[13]\"] },\n { \"$add\": [\"&TCC_MISS[14]\", \"&TCC_HIT[14]\"] },\n { \"$add\": [\"&TCC_MISS[15]\", \"&TCC_HIT[15]\"] },\n { \"$add\": [\"&TCC_MISS[16]\", \"&TCC_HIT[16]\"] },\n { \"$add\": [\"&TCC_MISS[17]\", \"&TCC_HIT[17]\"] },\n { \"$add\": [\"&TCC_MISS[18]\", \"&TCC_HIT[18]\"] },\n { \"$add\": [\"&TCC_MISS[19]\", \"&TCC_HIT[19]\"] },\n { \"$add\": [\"&TCC_MISS[20]\", \"&TCC_HIT[20]\"] },\n { \"$add\": [\"&TCC_MISS[21]\", \"&TCC_HIT[21]\"] },\n { \"$add\": [\"&TCC_MISS[22]\", \"&TCC_HIT[22]\"] },\n { \"$add\": [\"&TCC_MISS[23]\", \"&TCC_HIT[23]\"] },\n { \"$add\": [\"&TCC_MISS[24]\", \"&TCC_HIT[24]\"] },\n { \"$add\": [\"&TCC_MISS[25]\", \"&TCC_HIT[25]\"] },\n { \"$add\": [\"&TCC_MISS[26]\", \"&TCC_HIT[26]\"] },\n { \"$add\": [\"&TCC_MISS[27]\", \"&TCC_HIT[27]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[28]\"] },\n { \"$add\": [\"&TCC_MISS[28]\", \"&TCC_HIT[29]\"] },\n { \"$add\": [\"&TCC_MISS[30]\", \"&TCC_HIT[30]\"] },\n { \"$add\": [\"&TCC_MISS[31]\", \"&TCC_HIT[31]\"] }\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_REQ[0]\"},\n {\"$toInt\": \"&TCC_REQ[1]\"},\n {\"$toInt\": \"&TCC_REQ[2]\"},\n {\"$toInt\": \"&TCC_REQ[3]\"},\n {\"$toInt\": \"&TCC_REQ[4]\"},\n {\"$toInt\": \"&TCC_REQ[5]\"},\n {\"$toInt\": \"&TCC_REQ[6]\"},\n {\"$toInt\": \"&TCC_REQ[7]\"},\n {\"$toInt\": \"&TCC_REQ[8]\"},\n {\"$toInt\": \"&TCC_REQ[9]\"},\n {\"$toInt\": \"&TCC_REQ[10]\"},\n {\"$toInt\": \"&TCC_REQ[11]\"},\n {\"$toInt\": \"&TCC_REQ[12]\"},\n {\"$toInt\": \"&TCC_REQ[13]\"},\n {\"$toInt\": \"&TCC_REQ[14]\"},\n {\"$toInt\": \"&TCC_REQ[15]\"},\n {\"$toInt\": \"&TCC_REQ[16]\"},\n {\"$toInt\": \"&TCC_REQ[17]\"},\n {\"$toInt\": \"&TCC_REQ[18]\"},\n {\"$toInt\": \"&TCC_REQ[19]\"},\n {\"$toInt\": \"&TCC_REQ[20]\"},\n {\"$toInt\": \"&TCC_REQ[21]\"},\n {\"$toInt\": \"&TCC_REQ[22]\"},\n {\"$toInt\": \"&TCC_REQ[23]\"},\n {\"$toInt\": \"&TCC_REQ[24]\"},\n {\"$toInt\": \"&TCC_REQ[25]\"},\n {\"$toInt\": \"&TCC_REQ[26]\"},\n {\"$toInt\": \"&TCC_REQ[27]\"},\n {\"$toInt\": \"&TCC_REQ[28]\"},\n {\"$toInt\": \"&TCC_REQ[29]\"},\n {\"$toInt\": \"&TCC_REQ[30]\"},\n {\"$toInt\": \"&TCC_REQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_read_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_read_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_read_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_read_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_READ[0]\"},\n {\"$toInt\": \"&TCC_READ[1]\"},\n {\"$toInt\": \"&TCC_READ[2]\"},\n {\"$toInt\": \"&TCC_READ[3]\"},\n {\"$toInt\": \"&TCC_READ[4]\"},\n {\"$toInt\": \"&TCC_READ[5]\"},\n {\"$toInt\": \"&TCC_READ[6]\"},\n {\"$toInt\": \"&TCC_READ[7]\"},\n {\"$toInt\": \"&TCC_READ[8]\"},\n {\"$toInt\": \"&TCC_READ[9]\"},\n {\"$toInt\": \"&TCC_READ[10]\"},\n {\"$toInt\": \"&TCC_READ[11]\"},\n {\"$toInt\": \"&TCC_READ[12]\"},\n {\"$toInt\": \"&TCC_READ[13]\"},\n {\"$toInt\": \"&TCC_READ[14]\"},\n {\"$toInt\": \"&TCC_READ[15]\"},\n {\"$toInt\": \"&TCC_READ[16]\"},\n {\"$toInt\": \"&TCC_READ[17]\"},\n {\"$toInt\": \"&TCC_READ[18]\"},\n {\"$toInt\": \"&TCC_READ[19]\"},\n {\"$toInt\": \"&TCC_READ[20]\"},\n {\"$toInt\": \"&TCC_READ[21]\"},\n {\"$toInt\": \"&TCC_READ[22]\"},\n {\"$toInt\": \"&TCC_READ[23]\"},\n {\"$toInt\": \"&TCC_READ[24]\"},\n {\"$toInt\": \"&TCC_READ[25]\"},\n {\"$toInt\": \"&TCC_READ[26]\"},\n {\"$toInt\": \"&TCC_READ[27]\"},\n {\"$toInt\": \"&TCC_READ[28]\"},\n {\"$toInt\": \"&TCC_READ[29]\"},\n {\"$toInt\": \"&TCC_READ[30]\"},\n {\"$toInt\": \"&TCC_READ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_write_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_write_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_write_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_write_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_WRITE[0]\"},\n {\"$toInt\": \"&TCC_WRITE[1]\"},\n {\"$toInt\": \"&TCC_WRITE[2]\"},\n {\"$toInt\": \"&TCC_WRITE[3]\"},\n {\"$toInt\": \"&TCC_WRITE[4]\"},\n {\"$toInt\": \"&TCC_WRITE[5]\"},\n {\"$toInt\": \"&TCC_WRITE[6]\"},\n {\"$toInt\": \"&TCC_WRITE[7]\"},\n {\"$toInt\": \"&TCC_WRITE[8]\"},\n {\"$toInt\": \"&TCC_WRITE[9]\"},\n {\"$toInt\": \"&TCC_WRITE[10]\"},\n {\"$toInt\": \"&TCC_WRITE[11]\"},\n {\"$toInt\": \"&TCC_WRITE[12]\"},\n {\"$toInt\": \"&TCC_WRITE[13]\"},\n {\"$toInt\": \"&TCC_WRITE[14]\"},\n {\"$toInt\": \"&TCC_WRITE[15]\"},\n {\"$toInt\": \"&TCC_WRITE[16]\"},\n {\"$toInt\": \"&TCC_WRITE[17]\"},\n {\"$toInt\": \"&TCC_WRITE[18]\"},\n {\"$toInt\": \"&TCC_WRITE[19]\"},\n {\"$toInt\": \"&TCC_WRITE[20]\"},\n {\"$toInt\": \"&TCC_WRITE[21]\"},\n {\"$toInt\": \"&TCC_WRITE[22]\"},\n {\"$toInt\": \"&TCC_WRITE[23]\"},\n {\"$toInt\": \"&TCC_WRITE[24]\"},\n {\"$toInt\": \"&TCC_WRITE[25]\"},\n {\"$toInt\": \"&TCC_WRITE[26]\"},\n {\"$toInt\": \"&TCC_WRITE[27]\"},\n {\"$toInt\": \"&TCC_WRITE[28]\"},\n {\"$toInt\": \"&TCC_WRITE[29]\"},\n {\"$toInt\": \"&TCC_WRITE[30]\"},\n {\"$toInt\": \"&TCC_WRITE[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_atomic_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_atomic_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_atomic_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_atomic_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaRead_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_eaRead_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_eaRead_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_eaRead_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaWrite_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_eaWrite_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_eaWrite_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_eaWrite_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaAtomic_req\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_eaAtomic_req\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_eaAtomic_req\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_eaAtomic_req\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_ATOMIC[0]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[1]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[2]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[3]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[4]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[5]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[6]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[7]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[8]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[9]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[10]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[11]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[12]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[13]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[14]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[15]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[16]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[17]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[18]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[19]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[20]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[21]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[22]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[23]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[24]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[25]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[26]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[27]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[28]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[29]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[30]\"},\n {\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_eaRead_lat\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_eaRead_lat\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_eaRead_lat\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_eaRead_lat\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_RDREQ_LEVEL[0]\",\n \"&TCC_EA_RDREQ_LEVEL[1]\",\n \"&TCC_EA_RDREQ_LEVEL[2]\",\n \"&TCC_EA_RDREQ_LEVEL[3]\",\n \"&TCC_EA_RDREQ_LEVEL[4]\",\n \"&TCC_EA_RDREQ_LEVEL[5]\",\n \"&TCC_EA_RDREQ_LEVEL[6]\",\n \"&TCC_EA_RDREQ_LEVEL[7]\",\n \"&TCC_EA_RDREQ_LEVEL[8]\",\n \"&TCC_EA_RDREQ_LEVEL[9]\",\n \"&TCC_EA_RDREQ_LEVEL[10]\",\n \"&TCC_EA_RDREQ_LEVEL[11]\",\n \"&TCC_EA_RDREQ_LEVEL[12]\",\n \"&TCC_EA_RDREQ_LEVEL[13]\",\n \"&TCC_EA_RDREQ_LEVEL[14]\",\n \"&TCC_EA_RDREQ_LEVEL[15]\",\n \"&TCC_EA_RDREQ_LEVEL[16]\",\n \"&TCC_EA_RDREQ_LEVEL[17]\",\n \"&TCC_EA_RDREQ_LEVEL[18]\",\n \"&TCC_EA_RDREQ_LEVEL[19]\",\n \"&TCC_EA_RDREQ_LEVEL[20]\",\n \"&TCC_EA_RDREQ_LEVEL[21]\",\n \"&TCC_EA_RDREQ_LEVEL[22]\",\n \"&TCC_EA_RDREQ_LEVEL[23]\",\n \"&TCC_EA_RDREQ_LEVEL[24]\",\n \"&TCC_EA_RDREQ_LEVEL[25]\",\n \"&TCC_EA_RDREQ_LEVEL[26]\",\n \"&TCC_EA_RDREQ_LEVEL[27]\",\n \"&TCC_EA_RDREQ_LEVEL[28]\",\n \"&TCC_EA_RDREQ_LEVEL[29]\",\n \"&TCC_EA_RDREQ_LEVEL[30]\",\n \"&TCC_EA_RDREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_RDREQ[0]\",\n \"&TCC_EA_RDREQ[1]\",\n \"&TCC_EA_RDREQ[2]\",\n \"&TCC_EA_RDREQ[3]\",\n \"&TCC_EA_RDREQ[4]\",\n \"&TCC_EA_RDREQ[5]\",\n \"&TCC_EA_RDREQ[6]\",\n \"&TCC_EA_RDREQ[7]\",\n \"&TCC_EA_RDREQ[8]\",\n \"&TCC_EA_RDREQ[9]\",\n \"&TCC_EA_RDREQ[10]\",\n \"&TCC_EA_RDREQ[11]\",\n \"&TCC_EA_RDREQ[12]\",\n \"&TCC_EA_RDREQ[13]\",\n \"&TCC_EA_RDREQ[14]\",\n \"&TCC_EA_RDREQ[15]\",\n \"&TCC_EA_RDREQ[16]\",\n \"&TCC_EA_RDREQ[17]\",\n \"&TCC_EA_RDREQ[18]\",\n \"&TCC_EA_RDREQ[19]\",\n \"&TCC_EA_RDREQ[20]\",\n \"&TCC_EA_RDREQ[21]\",\n \"&TCC_EA_RDREQ[22]\",\n \"&TCC_EA_RDREQ[23]\",\n \"&TCC_EA_RDREQ[24]\",\n \"&TCC_EA_RDREQ[25]\",\n \"&TCC_EA_RDREQ[26]\",\n \"&TCC_EA_RDREQ[27]\",\n \"&TCC_EA_RDREQ[28]\",\n \"&TCC_EA_RDREQ[29]\",\n \"&TCC_EA_RDREQ[30]\",\n \"&TCC_EA_RDREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_eaWrite_lat\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_eaWrite_lat\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_eaWrite_lat\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_eaWrite_lat\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_WRREQ_LEVEL[0]\",\n \"&TCC_EA_WRREQ_LEVEL[1]\",\n \"&TCC_EA_WRREQ_LEVEL[2]\",\n \"&TCC_EA_WRREQ_LEVEL[3]\",\n \"&TCC_EA_WRREQ_LEVEL[4]\",\n \"&TCC_EA_WRREQ_LEVEL[5]\",\n \"&TCC_EA_WRREQ_LEVEL[6]\",\n \"&TCC_EA_WRREQ_LEVEL[7]\",\n \"&TCC_EA_WRREQ_LEVEL[8]\",\n \"&TCC_EA_WRREQ_LEVEL[9]\",\n \"&TCC_EA_WRREQ_LEVEL[10]\",\n \"&TCC_EA_WRREQ_LEVEL[11]\",\n \"&TCC_EA_WRREQ_LEVEL[12]\",\n \"&TCC_EA_WRREQ_LEVEL[13]\",\n \"&TCC_EA_WRREQ_LEVEL[14]\",\n \"&TCC_EA_WRREQ_LEVEL[15]\",\n \"&TCC_EA_WRREQ_LEVEL[16]\",\n \"&TCC_EA_WRREQ_LEVEL[17]\",\n \"&TCC_EA_WRREQ_LEVEL[18]\",\n \"&TCC_EA_WRREQ_LEVEL[19]\",\n \"&TCC_EA_WRREQ_LEVEL[20]\",\n \"&TCC_EA_WRREQ_LEVEL[21]\",\n \"&TCC_EA_WRREQ_LEVEL[22]\",\n \"&TCC_EA_WRREQ_LEVEL[23]\",\n \"&TCC_EA_WRREQ_LEVEL[24]\",\n \"&TCC_EA_WRREQ_LEVEL[25]\",\n \"&TCC_EA_WRREQ_LEVEL[26]\",\n \"&TCC_EA_WRREQ_LEVEL[27]\",\n \"&TCC_EA_WRREQ_LEVEL[28]\",\n \"&TCC_EA_WRREQ_LEVEL[29]\",\n \"&TCC_EA_WRREQ_LEVEL[30]\",\n \"&TCC_EA_WRREQ_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_WRREQ[0]\",\n \"&TCC_EA_WRREQ[1]\",\n \"&TCC_EA_WRREQ[2]\",\n \"&TCC_EA_WRREQ[3]\",\n \"&TCC_EA_WRREQ[4]\",\n \"&TCC_EA_WRREQ[5]\",\n \"&TCC_EA_WRREQ[6]\",\n \"&TCC_EA_WRREQ[7]\",\n \"&TCC_EA_WRREQ[8]\",\n \"&TCC_EA_WRREQ[9]\",\n \"&TCC_EA_WRREQ[10]\",\n \"&TCC_EA_WRREQ[11]\",\n \"&TCC_EA_WRREQ[12]\",\n \"&TCC_EA_WRREQ[13]\",\n \"&TCC_EA_WRREQ[14]\",\n \"&TCC_EA_WRREQ[15]\",\n \"&TCC_EA_WRREQ[16]\",\n \"&TCC_EA_WRREQ[17]\",\n \"&TCC_EA_WRREQ[18]\",\n \"&TCC_EA_WRREQ[19]\",\n \"&TCC_EA_WRREQ[20]\",\n \"&TCC_EA_WRREQ[21]\",\n \"&TCC_EA_WRREQ[22]\",\n \"&TCC_EA_WRREQ[23]\",\n \"&TCC_EA_WRREQ[24]\",\n \"&TCC_EA_WRREQ[25]\",\n \"&TCC_EA_WRREQ[26]\",\n \"&TCC_EA_WRREQ[27]\",\n \"&TCC_EA_WRREQ[28]\",\n \"&TCC_EA_WRREQ[29]\",\n \"&TCC_EA_WRREQ[30]\",\n \"&TCC_EA_WRREQ[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_eaAtomic_lat\": {\n \"$avg\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"min_eaAtomic_lat\": {\n \"$min\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"max_eaAtomic_lat\": {\n \"$max\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"stdDev_eaAtomic_lat\": {\n \"$stdDevPop\":{\n \"$cond\": [\n {\"$ne\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n },\n 0\n ]},\n {\"$divide\": [\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC_LEVEL[0]\",\n \"&TCC_EA_ATOMIC_LEVEL[1]\",\n \"&TCC_EA_ATOMIC_LEVEL[2]\",\n \"&TCC_EA_ATOMIC_LEVEL[3]\",\n \"&TCC_EA_ATOMIC_LEVEL[4]\",\n \"&TCC_EA_ATOMIC_LEVEL[5]\",\n \"&TCC_EA_ATOMIC_LEVEL[6]\",\n \"&TCC_EA_ATOMIC_LEVEL[7]\",\n \"&TCC_EA_ATOMIC_LEVEL[8]\",\n \"&TCC_EA_ATOMIC_LEVEL[9]\",\n \"&TCC_EA_ATOMIC_LEVEL[10]\",\n \"&TCC_EA_ATOMIC_LEVEL[11]\",\n \"&TCC_EA_ATOMIC_LEVEL[12]\",\n \"&TCC_EA_ATOMIC_LEVEL[13]\",\n \"&TCC_EA_ATOMIC_LEVEL[14]\",\n \"&TCC_EA_ATOMIC_LEVEL[15]\",\n \"&TCC_EA_ATOMIC_LEVEL[16]\",\n \"&TCC_EA_ATOMIC_LEVEL[17]\",\n \"&TCC_EA_ATOMIC_LEVEL[18]\",\n \"&TCC_EA_ATOMIC_LEVEL[19]\",\n \"&TCC_EA_ATOMIC_LEVEL[20]\",\n \"&TCC_EA_ATOMIC_LEVEL[21]\",\n \"&TCC_EA_ATOMIC_LEVEL[22]\",\n \"&TCC_EA_ATOMIC_LEVEL[23]\",\n \"&TCC_EA_ATOMIC_LEVEL[24]\",\n \"&TCC_EA_ATOMIC_LEVEL[25]\",\n \"&TCC_EA_ATOMIC_LEVEL[26]\",\n \"&TCC_EA_ATOMIC_LEVEL[27]\",\n \"&TCC_EA_ATOMIC_LEVEL[28]\",\n \"&TCC_EA_ATOMIC_LEVEL[29]\",\n \"&TCC_EA_ATOMIC_LEVEL[30]\",\n \"&TCC_EA_ATOMIC_LEVEL[31]\"\n ]\n },\n {\n \"$add\":[\n \"&TCC_EA_ATOMIC[0]\",\n \"&TCC_EA_ATOMIC[1]\",\n \"&TCC_EA_ATOMIC[2]\",\n \"&TCC_EA_ATOMIC[3]\",\n \"&TCC_EA_ATOMIC[4]\",\n \"&TCC_EA_ATOMIC[5]\",\n \"&TCC_EA_ATOMIC[6]\",\n \"&TCC_EA_ATOMIC[7]\",\n \"&TCC_EA_ATOMIC[8]\",\n \"&TCC_EA_ATOMIC[9]\",\n \"&TCC_EA_ATOMIC[10]\",\n \"&TCC_EA_ATOMIC[11]\",\n \"&TCC_EA_ATOMIC[12]\",\n \"&TCC_EA_ATOMIC[13]\",\n \"&TCC_EA_ATOMIC[14]\",\n \"&TCC_EA_ATOMIC[15]\",\n \"&TCC_EA_ATOMIC[16]\",\n \"&TCC_EA_ATOMIC[17]\",\n \"&TCC_EA_ATOMIC[18]\",\n \"&TCC_EA_ATOMIC[19]\",\n \"&TCC_EA_ATOMIC[20]\",\n \"&TCC_EA_ATOMIC[21]\",\n \"&TCC_EA_ATOMIC[22]\",\n \"&TCC_EA_ATOMIC[23]\",\n \"&TCC_EA_ATOMIC[24]\",\n \"&TCC_EA_ATOMIC[25]\",\n \"&TCC_EA_ATOMIC[26]\",\n \"&TCC_EA_ATOMIC[27]\",\n \"&TCC_EA_ATOMIC[28]\",\n \"&TCC_EA_ATOMIC[29]\",\n \"&TCC_EA_ATOMIC[30]\",\n \"&TCC_EA_ATOMIC[31]\"\n ]\n }\n ]},\n null\n ]\n }\n },\n \"mean_ea_read_stall_io_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_read_stall_io_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_read_stall_io_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_read_stall_io_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_read_stall_gmi_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_read_stall_gmi_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_read_stall_gmi_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_read_stall_gmi_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_read_stall_dram_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_read_stall_dram_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_read_stall_dram_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_read_stall_dram_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_io_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_io_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_io_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_io_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_gmi_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_gmi_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_gmi_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_gmi_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_dram_credit\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_dram_credit\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_dram_credit\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_dram_credit\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"},\n {\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"mean_ea_write_stall_too_many\": {\n \"$avg\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"min_ea_write_stall_too_many\": {\n \"$min\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"max_ea_write_stall_too_many\": {\n \"$max\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n },\n \"stdDev_ea_write_stall_too_many\": {\n \"$stdDevPop\":{\n \"$divide\": [\n {\n \"$divide\": [\n {\n \"$add\": [\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"},\n {\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}\n ]\n },\n 32\n ]\n },\n \"&denom\"\n ] \n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"L2 Cache Hit Rate\",\n \"Mean\": \"&mean_hit_rate\",\n \"Std Dev\": \"&stdDev_hit_rate\",\n \"Min\": \"&min_hit_rate\",\n \"Max\": \"&max_hit_rate\",\n \"Units\": \"pct\"\n },\n {\n \"Metric\": \"Req\",\n \"Mean\": \"&mean_req\",\n \"Std Dev\": \"&stdDev_req\",\n \"Min\": \"&min_req\",\n \"Max\": \"&max_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L1 - L2 Read Req\",\n \"Mean\": \"&mean_read_req\",\n \"Std Dev\": \"&stdDev_read_req\",\n \"Min\": \"&min_read_req\",\n \"Max\": \"&max_read_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L1 - L2 Write Req\",\n \"Mean\": \"&mean_write_req\",\n \"Std Dev\": \"&stdDev_write_req\",\n \"Min\": \"&min_write_req\",\n \"Max\": \"&max_write_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L1 - L2 Atomic Req\",\n \"Mean\": \"&mean_atomic_req\",\n \"Std Dev\": \"&stdDev_atomic_req\",\n \"Min\": \"&min_atomic_req\",\n \"Max\": \"&max_atomic_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Read Req\",\n \"Mean\": \"&mean_eaRead_req\",\n \"Std Dev\": \"&stdDev_eaRead_req\",\n \"Min\": \"&min_eaRead_req\",\n \"Max\": \"&max_eaRead_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Write Req\", \n \"Mean\": \"&mean_eaWrite_req\",\n \"Std Dev\": \"&stdDev_eaWrite_req\",\n \"Min\": \"&min_eaWrite_req\",\n \"Max\": \"&max_eaWrite_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Atomic Req\",\n \"Mean\": \"&mean_eaAtomic_req\",\n \"Std Dev\": \"&stdDev_eaAtomic_req\",\n \"Min\": \"&min_eaAtomic_req\",\n \"Max\": \"&max_eaAtomic_req\",\n \"Units\": $normUnit\n },\n {\n \"Metric\": \"L2 - EA Read Lat\",\n \"Mean\": \"&mean_eaRead_lat\",\n \"Std Dev\": \"&stdDev_eaRead_lat\",\n \"Min\": \"&min_eaRead_lat\",\n \"Max\": \"&max_eaRead_lat\",\n \"Units\": \"Cycles\"\n },\n {\n \"Metric\": \"L2 - EA Write Lat\",\n \"Mean\": \"&mean_eaWrite_lat\",\n \"Std Dev\": \"&stdDev_eaWrite_lat\",\n \"Min\": \"&min_eaWrite_lat\",\n \"Max\": \"&max_eaWrite_lat\",\n \"Units\": \"Cycles\"\n },\n {\n \"Metric\": \"L2 - EA Atomic Lat\",\n \"Mean\": \"&mean_eaAtomic_lat\",\n \"Std Dev\": \"&stdDev_eaAtomic_lat\",\n \"Min\": \"&min_eaAtomic_lat\",\n \"Max\": \"&max_eaAtomic_lat\",\n \"Units\": \"Cycles\"\n },\n {\n \"Metric\": \"L2 - EA Read Stall (IO)\",\n \"Mean\": \"&mean_ea_read_stall_io_credit\",\n \"Std Dev\": \"&stdDev_ea_read_stall_io_credit\",\n \"Min\": \"&min_ea_read_stall_io_credit\",\n \"Max\": \"&max_ea_read_stall_io_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Read Stall (GMI)\",\n \"Mean\": \"&mean_ea_read_stall_gmi_credit\",\n \"Std Dev\": \"&stdDev_ea_read_stall_gmi_credit\",\n \"Min\": \"&min_ea_read_stall_gmi_credit\",\n \"Max\": \"&max_ea_read_stall_gmi_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Read Stall (DRAM)\",\n \"Mean\": \"&mean_ea_read_stall_dram_credit\",\n \"Std Dev\": \"&stdDev_ea_read_stall_dram_credit\",\n \"Min\": \"&min_ea_read_stall_dram_credit\",\n \"Max\": \"&max_ea_read_stall_dram_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Stall (IO)\",\n \"Mean\": \"&mean_ea_write_stall_io_credit\",\n \"Std Dev\": \"&stdDev_ea_write_stall_io_credit\",\n \"Min\": \"&min_ea_write_stall_io_credit\",\n \"Max\": \"&max_ea_write_stall_io_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Stall (GMI)\",\n \"Mean\": \"&mean_ea_write_stall_gmi_credit\",\n \"Std Dev\": \"&stdDev_ea_write_stall_gmi_credit\",\n \"Min\": \"&min_ea_write_stall_gmi_credit\",\n \"Max\": \"&max_ea_write_stall_gmi_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Stall (DRAM)\",\n \"Mean\": \"&mean_ea_write_stall_dram_credit\",\n \"Std Dev\": \"&stdDev_ea_write_stall_dram_credit\",\n \"Min\": \"&min_ea_write_stall_dram_credit\",\n \"Max\": \"&max_ea_write_stall_dram_credit\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n },\n {\n \"Metric\": \"L2 - EA Write Starve\",\n \"Mean\": \"&mean_ea_write_stall_too_many\",\n \"Std Dev\": \"&stdDev_ea_write_stall_too_many\",\n \"Min\": \"&min_ea_write_stall_too_many\",\n \"Max\": \"&max_ea_write_stall_too_many\",\n \"Units\": {\"$concat\": [\"Cycles \", $normUnit] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "type": "table" + } + ], + "title": "Aggregate Stats (All 32 channels)", + "transformations": [], + "type": "table" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 356 + }, + "id": 87, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 Cache Hit Rate (Percent) (Channel 0 - 15) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "light", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:850", + "Col": 1, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Hit Rate", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 356 + }, + "id": 92, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "Cache Hit Rate % (Channel 16 - 31) ", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:565", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 364 + }, + "id": 81, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Read Requests(Channel 0-15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:656", + "Col": 3, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 364 + }, + "id": 82, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L 2 Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:697", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 372 + }, + "id": 83, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:750", + "Col": 4, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 372 + }, + "id": 84, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 380 + }, + "id": 85, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 0-15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLimitValue": 100, + "HighLmitLineWidth": 1, + "HighSideMargin": 0, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "100%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 80, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineValue": 105, + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": true, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:791", + "Col": 5, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 380 + }, + "id": 91, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L1 - L2 Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 388 + }, + "id": 189, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 6, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 388 + }, + "id": 195, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 396 + }, + "id": 191, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 0 - 15) : $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 7, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Req", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 396 + }, + "id": 197, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 404 + }, + "id": 193, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Request (Channel 0 - 15): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": false, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 2, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 8, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA AtomicReq", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 404 + }, + "id": 199, + "maxDataPoints": 10, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Requests (Channel 16-31): $normUnit", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupColName": "", + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 412 + }, + "hideTimeOverride": false, + "id": 68, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b0_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[0]\"]}, \n { \"$add\": [\"&TCC_HIT[0]\", \"&TCC_MISS[0]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b0_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[0]\"}, \"&denom\"] } \n },\n \"b0_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[0]\"}, \"&denom\"] } \n },\n \"b0_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[0]\"}, \"&denom\"] } \n },\n \"b0_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[0]\"}, \"&denom\"] } \n },\n \"b0_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[0]\"}, \"&denom\"] }\n },\n \"b0_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[0]\"}, \"&denom\"] } \n },\n \"b0_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[0]\"}, \"&denom\"] } \n },\n\n \"b0_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[0]\", \"&TCC_EA_RDREQ[0]\"]}, null] } },\n \"b0_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[0]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[0]\", \"&TCC_EA_WRREQ[0]\"]}, null] } },\n \"b0_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[0]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[0]\", \"&TCC_EA_ATOMIC[0]\"]}, null]}},\n\n \"b0_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[0]\"}, \"&denom\"] }},\n \"b0_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[0]\"}, \"&denom\"] }},\n\n \n \"b1_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[1]\"]}, \n { \"$add\": [\"&TCC_HIT[1]\", \"&TCC_MISS[1]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b1_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[1]\"}, \"&denom\"] } \n },\n \"b1_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[1]\"}, \"&denom\"] } \n },\n \"b1_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[1]\"}, \"&denom\"] } \n },\n \"b1_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[1]\"}, \"&denom\"] }\n },\n \"b1_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[1]\"}, \"&denom\"] } \n },\n \"b1_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[1]\"}, \"&denom\"] } \n },\n \"b1_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[1]\", \"&TCC_EA_RDREQ[1]\"]}, null] } },\n \"b1_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[1]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[1]\", \"&TCC_EA_WRREQ[1]\"]}, null] } },\n \"b1_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[1]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[1]\", \"&TCC_EA_ATOMIC[1]\"]}, null]}},\n\n \"b1_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[1]\"}, \"&denom\"] }},\n \"b1_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[1]\"}, \"&denom\"] }},\n\n\n \"b2_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[2]\"]}, \n { \"$add\": [\"&TCC_HIT[2]\", \"&TCC_MISS[2]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b2_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[2]\"}, \"&denom\"] }\n },\n \"b2_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[2]\"}, \"&denom\"] } \n },\n \"b2_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[2]\"}, \"&denom\"] }\n },\n \"b2_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[2]\"}, \"&denom\"] }\n },\n \"b2_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[2]\"}, \"&denom\"] }\n },\n \"b2_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[2]\", \"&TCC_EA_RDREQ[2]\"]}, null] } },\n \"b2_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[2]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[2]\", \"&TCC_EA_WRREQ[2]\"]}, null] } },\n \"b2_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[2]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[2]\", \"&TCC_EA_ATOMIC[2]\"]}, null]}},\n\n \"b2_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[2]\"}, \"&denom\"] }},\n \"b2_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[2]\"}, \"&denom\"] }},\n\n\n \n \"b3_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[3]\"]}, \n { \"$add\": [\"&TCC_HIT[3]\", \"&TCC_MISS[3]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b3_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[3]\"}, \"&denom\"] } \n },\n \"b3_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[3]\"}, \"&denom\"] } \n },\n \"b3_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[3]\"}, \"&denom\"] }\n },\n \"b3_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[3]\"}, \"&denom\"] }\n },\n \"b3_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[3]\"}, \"&denom\"] }\n },\n \"b3_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[3]\"}, \"&denom\"] } \n },\n \"b3_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[3]\", \"&TCC_EA_RDREQ[3]\"]}, null] } },\n \"b3_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[3]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[3]\", \"&TCC_EA_WRREQ[3]\"]}, null] } },\n \"b3_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[3]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[3]\", \"&TCC_EA_ATOMIC[3]\"]}, null]}},\n\n \"b3_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[3]\"}, \"&denom\"] }},\n \"b3_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[3]\"}, \"&denom\"] }},\n\n\n \n \"b4_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[4]\"]}, \n { \"$add\": [\"&TCC_HIT[4]\", \"&TCC_MISS[4]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b4_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[4]\"}, \"&denom\"] } \n },\n \"b4_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[4]\"}, \"&denom\"] } \n },\n \"b4_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[4]\"}, \"&denom\"] }\n },\n \"b4_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[4]\"}, \"&denom\"] } \n },\n \"b4_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[4]\"}, \"&denom\"] } \n },\n \"b4_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[4]\", \"&TCC_EA_RDREQ[4]\"]}, null] } },\n \"b4_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[4]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[4]\", \"&TCC_EA_WRREQ[4]\"]}, null] } },\n \"b4_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[4]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[4]\", \"&TCC_EA_ATOMIC[4]\"]}, null]}},\n\n \"b4_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[4]\"}, \"&denom\"] }},\n \"b4_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[4]\"}, \"&denom\"] }},\n\n\n \n \"b5_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[5]\"]}, \n { \"$add\": [\"&TCC_HIT[5]\", \"&TCC_MISS[5]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b5_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[5]\"}, \"&denom\"] } \n },\n \"b5_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[5]\"}, \"&denom\"] } \n },\n \"b5_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[5]\"}, \"&denom\"] } \n },\n \"b5_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[5]\"}, \"&denom\"] } \n },\n \"b5_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[5]\"}, \"&denom\"] } \n },\n \"b5_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[5]\", \"&TCC_EA_RDREQ[5]\"]}, null] } },\n \"b5_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[5]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[5]\", \"&TCC_EA_WRREQ[5]\"]}, null] } },\n \"b5_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[5]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[5]\", \"&TCC_EA_ATOMIC[5]\"]}, null]}},\n\n \"b5_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[5]\"}, \"&denom\"] }},\n \"b5_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[5]\"}, \"&denom\"] }},\n\n\n \n \"b6_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[6]\"]}, \n { \"$add\": [\"&TCC_HIT[6]\", \"&TCC_MISS[6]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b6_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[6]\"}, \"&denom\"] } \n },\n \"b6_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[6]\"}, \"&denom\"] } \n },\n \"b6_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[6]\"}, \"&denom\"] } \n },\n \"b6_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[6]\"}, \"&denom\"] } \n },\n \"b6_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[6]\"}, \"&denom\"] }\n },\n \"b6_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[6]\"}, \"&denom\"] } \n },\n \"b6_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[6]\", \"&TCC_EA_RDREQ[6]\"]}, null] } },\n \"b6_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[6]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[6]\", \"&TCC_EA_WRREQ[6]\"]}, null] } },\n \"b6_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[6]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[6]\", \"&TCC_EA_ATOMIC[6]\"]}, null]}},\n\n \"b6_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[6]\"}, \"&denom\"] }},\n \"b6_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[6]\"}, \"&denom\"] }},\n\n\n \n \"b7_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[7]\"]}, \n { \"$add\": [\"&TCC_HIT[7]\", \"&TCC_MISS[7]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b7_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[7]\"}, \"&denom\"] } \n },\n \"b7_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[7]\"}, \"&denom\"] } \n },\n \"b7_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[7]\"}, \"&denom\"] } \n },\n \"b7_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[7]\"}, \"&denom\"] } \n },\n \"b7_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[7]\"}, \"&denom\"] }\n },\n \"b7_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[7]\"}, \"&denom\"] } \n },\n \"b7_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[7]\", \"&TCC_EA_RDREQ[7]\"]}, null] } },\n \"b7_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[7]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[7]\", \"&TCC_EA_WRREQ[7]\"]}, null] } },\n \"b7_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[7]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[7]\", \"&TCC_EA_ATOMIC[7]\"]}, null]}},\n\n \"b7_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[7]\"}, \"&denom\"] }},\n \"b7_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[7]\"}, \"&denom\"] }},\n\n\n \n \"b8_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[8]\"]}, \n { \"$add\": [\"&TCC_HIT[8]\", \"&TCC_MISS[8]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b8_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[8]\"}, \"&denom\"] } \n },\n \"b8_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[8]\"}, \"&denom\"] } \n },\n \"b8_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[8]\"}, \"&denom\"] } \n },\n \"b8_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[8]\"}, \"&denom\"] } \n },\n \"b8_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[8]\"}, \"&denom\"] } \n },\n \"b8_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[8]\", \"&TCC_EA_RDREQ[8]\"]}, null] } },\n \"b8_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[8]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[8]\", \"&TCC_EA_WRREQ[8]\"]}, null] } },\n \"b8_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[8]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[8]\", \"&TCC_EA_ATOMIC[8]\"]}, null]}},\n\n \"b8_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[8]\"}, \"&denom\"] }},\n \"b8_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[8]\"}, \"&denom\"] }},\n\n\n \n \"b9_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[9]\"]}, \n { \"$add\": [\"&TCC_HIT[9]\", \"&TCC_MISS[9]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b9_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[9]\"}, \"&denom\"] } \n },\n \"b9_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[9]\"}, \"&denom\"] } \n },\n \"b9_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[9]\"}, \"&denom\"] } \n },\n \"b9_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[9]\"}, \"&denom\"] } \n },\n \"b9_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[9]\"}, \"&denom\"] } \n },\n \"b9_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[9]\", \"&TCC_EA_RDREQ[9]\"]}, null] } },\n \"b9_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[9]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[9]\", \"&TCC_EA_WRREQ[9]\"]}, null] } },\n \"b9_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[9]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[9]\", \"&TCC_EA_ATOMIC[9]\"]}, null]}},\n\n \"b9_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[9]\"}, \"&denom\"] }},\n \"b9_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[9]\"}, \"&denom\"] }},\n\n\n \n \"b10_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[10]\"]}, \n { \"$add\": [\"&TCC_HIT[10]\", \"&TCC_MISS[10]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b10_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[10]\"}, \"&denom\"] } \n },\n \"b10_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[10]\"}, \"&denom\"] } \n },\n \"b10_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[10]\"}, \"&denom\"] } \n },\n \"b10_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[10]\"}, \"&denom\"] } \n },\n \"b10_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[10]\"}, \"&denom\"] } \n },\n \"b10_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[10]\", \"&TCC_EA_RDREQ[10]\"]}, null] } },\n \"b10_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[10]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[10]\", \"&TCC_EA_WRREQ[10]\"]}, null] } },\n \"b10_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[10]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[10]\", \"&TCC_EA_ATOMIC[10]\"]}, null]}},\n\n \"b10_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[10]\"}, \"&denom\"] }},\n \"b10_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[10]\"}, \"&denom\"] }},\n\n\n \n \"b11_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[11]\"]}, \n { \"$add\": [\"&TCC_HIT[11]\", \"&TCC_MISS[11]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b11_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[11]\"}, \"&denom\"] } \n },\n \"b11_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[11]\"}, \"&denom\"] } \n },\n \"b11_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[11]\"}, \"&denom\"] } \n },\n \"b11_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[11]\"}, \"&denom\"] } \n },\n \"b11_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[11]\"}, \"&denom\"] } \n },\n \"b11_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[11]\", \"&TCC_EA_RDREQ[11]\"]}, null] } },\n \"b11_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[11]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[11]\", \"&TCC_EA_WRREQ[11]\"]}, null] } },\n \"b11_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[11]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[11]\", \"&TCC_EA_ATOMIC[11]\"]}, null]}},\n\n \"b11_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[11]\"}, \"&denom\"] }},\n \"b11_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[11]\"}, \"&denom\"] }},\n\n\n \n \"b12_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[12]\"]}, \n { \"$add\": [\"&TCC_HIT[12]\", \"&TCC_MISS[12]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b12_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[12]\"}, \"&denom\"] } \n },\n \"b12_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[12]\"}, \"&denom\"] } \n },\n \"b12_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[12]\"}, \"&denom\"] } \n },\n \"b12_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[12]\"}, \"&denom\"] } \n },\n \"b12_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[12]\"}, \"&denom\"] } \n },\n \"b12_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[12]\", \"&TCC_EA_RDREQ[12]\"]}, null] } },\n \"b12_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[12]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[12]\", \"&TCC_EA_WRREQ[12]\"]}, null] } },\n \"b12_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[12]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[12]\", \"&TCC_EA_ATOMIC[12]\"]}, null]}},\n\n \"b12_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[12]\"}, \"&denom\"] }},\n \"b12_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[12]\"}, \"&denom\"] }},\n\n\n \n \"b13_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[13]\"]}, \n { \"$add\": [\"&TCC_HIT[13]\", \"&TCC_MISS[13]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b13_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[13]\"}, \"&denom\"] } \n },\n \"b13_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[13]\"}, \"&denom\"] } \n },\n \"b13_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[13]\"}, \"&denom\"] } \n },\n \"b13_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[13]\"}, \"&denom\"] } \n },\n \"b13_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[13]\"}, \"&denom\"] }\n },\n \"b13_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[13]\"}, \"&denom\"] } \n },\n \"b13_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[13]\", \"&TCC_EA_RDREQ[13]\"]}, null] } },\n \"b13_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[13]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[13]\", \"&TCC_EA_WRREQ[13]\"]}, null] } },\n \"b13_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[13]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[13]\", \"&TCC_EA_ATOMIC[13]\"]}, null]}},\n\n \"b13_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[13]\"}, \"&denom\"] }},\n \"b13_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[13]\"}, \"&denom\"] }},\n\n\n \n \"b14_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[14]\"]}, \n { \"$add\": [\"&TCC_HIT[14]\", \"&TCC_MISS[14]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b14_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[14]\"}, \"&denom\"] } \n },\n \"b14_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[14]\"}, \"&denom\"] } \n },\n \"b14_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[14]\"}, \"&denom\"] } \n },\n \"b14_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[14]\"}, \"&denom\"] } \n },\n \"b14_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[14]\"}, \"&denom\"] } \n },\n \"b14_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[14]\", \"&TCC_EA_RDREQ[14]\"]}, null] } },\n \"b14_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[14]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[14]\", \"&TCC_EA_WRREQ[14]\"]}, null] } },\n \"b14_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[14]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[14]\", \"&TCC_EA_ATOMIC[14]\"]}, null]}},\n\n \"b14_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[14]\"}, \"&denom\"] }},\n \"b14_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[14]\"}, \"&denom\"] }},\n\n\n \n \"b15_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[15]\"]}, \n { \"$add\": [\"&TCC_HIT[15]\", \"&TCC_MISS[15]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b15_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[15]\"}, \"&denom\"] } \n },\n \"b15_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[15]\"}, \"&denom\"] } \n },\n \"b15_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[15]\"}, \"&denom\"] } \n },\n \"b15_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[15]\"}, \"&denom\"] } \n },\n \"b15_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[15]\"}, \"&denom\"] } \n },\n \"b15_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[15]\"}, \"&denom\"] }\n },\n \"b15_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[15]\", \"&TCC_EA_RDREQ[15]\"]}, null] } },\n \"b15_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[15]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[15]\", \"&TCC_EA_WRREQ[15]\"]}, null] } },\n \"b15_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[15]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[15]\", \"&TCC_EA_ATOMIC[15]\"]}, null]}},\n\n \"b15_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[15]\"}, \"&denom\"] }},\n \"b15_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[15]\"}, \"&denom\"] }}\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"0\",\n \"Hit Rate\": \"&b0_hitRate\",\n \"Req\": \"&b0_req\",\n \"Read Req\": \"&b0_readReq\",\n \"Write Req\": \"&b0_writeReq\",\n \"AtomicReq\": \"&b0_atomicReq\",\n \"EA Read Req\": \"&b0_eaReadReq\",\n \"EA Write Req\": \"&b0_eaWriteReq\",\n \"EA AtomicReq\": \"&b0_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b0_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b0_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b0_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b0_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b0_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b0_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b0_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b0_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b0_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b0_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n },\n {\n \"Channel\": \"1\",\n \"Hit Rate\": \"&b1_hitRate\",\n \"Req\": \"&b1_req\",\n \"Read Req\": \"&b1_readReq\",\n \"Write Req\": \"&b1_writeReq\",\n \"AtomicReq\": \"&b1_atomicReq\",\n \"EA Read Req\": \"&b1_eaReadReq\",\n \"EA Write Req\": \"&b1_eaWriteReq\",\n \"EA AtomicReq\": \"&b1_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b1_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b1_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b1_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b1_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b1_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b1_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b1_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b1_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b1_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b1_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n },\n {\n \"Channel\": \"2\",\n \"Hit Rate\": \"&b2_hitRate\",\n \"Req\": \"&b2_req\",\n \"Read Req\": \"&b2_readReq\",\n \"Write Req\": \"&b2_writeReq\",\n \"AtomicReq\": \"&b2_atomicReq\",\n \"EA Read Req\": \"&b2_eaReadReq\",\n \"EA Write Req\": \"&b2_eaWriteReq\",\n \"EA AtomicReq\": \"&b2_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b2_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b2_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b2_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b2_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b2_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b2_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b2_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b2_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b2_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b2_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"3\",\n \"Hit Rate\": \"&b3_hitRate\",\n \"Req\": \"&b3_req\",\n \"Read Req\": \"&b3_readReq\",\n \"Write Req\": \"&b3_writeReq\",\n \"AtomicReq\": \"&b3_atomicReq\",\n \"EA Read Req\": \"&b3_eaReadReq\",\n \"EA Write Req\": \"&b3_eaWriteReq\",\n \"EA AtomicReq\": \"&b3_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b3_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b3_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b3_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b3_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b3_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b3_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b3_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b3_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b3_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b3_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"4\",\n \"Hit Rate\": \"&b4_hitRate\",\n \"Req\": \"&b4_req\",\n \"Read Req\": \"&b4_readReq\",\n \"Write Req\": \"&b4_writeReq\",\n \"AtomicReq\": \"&b4_atomicReq\",\n \"EA Read Req\": \"&b4_eaReadReq\",\n \"EA Write Req\": \"&b4_eaWriteReq\",\n \"EA AtomicReq\": \"&b4_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b4_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b4_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b4_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b4_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b4_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b4_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b4_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b4_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b4_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b4_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"5\",\n \"Hit Rate\": \"&b5_hitRate\",\n \"Req\": \"&b5_req\",\n \"Read Req\": \"&b5_readReq\",\n \"Write Req\": \"&b5_writeReq\",\n \"AtomicReq\": \"&b5_atomicReq\",\n \"EA Read Req\": \"&b5_eaReadReq\",\n \"EA Write Req\": \"&b5_eaWriteReq\",\n \"EA AtomicReq\": \"&b5_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b5_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b5_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b5_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b5_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b5_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b5_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b5_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b5_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b5_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b5_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"6\",\n \"Hit Rate\": \"&b6_hitRate\",\n \"Req\": \"&b6_req\",\n \"Read Req\": \"&b6_readReq\",\n \"Write Req\": \"&b6_writeReq\",\n \"AtomicReq\": \"&b6_atomicReq\",\n \"EA Read Req\": \"&b6_eaReadReq\",\n \"EA Write Req\": \"&b6_eaWriteReq\",\n \"EA AtomicReq\": \"&b6_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b6_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b6_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b6_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b6_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b6_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b6_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b6_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b6_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b6_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b6_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"7\",\n \"Hit Rate\": \"&b7_hitRate\",\n \"Req\": \"&b7_req\",\n \"Read Req\": \"&b7_readReq\",\n \"Write Req\": \"&b7_writeReq\",\n \"AtomicReq\": \"&b7_atomicReq\",\n \"EA Read Req\": \"&b7_eaReadReq\",\n \"EA Write Req\": \"&b7_eaWriteReq\",\n \"EA AtomicReq\": \"&b7_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b7_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b7_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b7_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b7_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b7_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b7_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b7_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b7_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b7_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b7_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"8\",\n \"Hit Rate\": \"&b8_hitRate\",\n \"Req\": \"&b8_req\",\n \"Read Req\": \"&b8_readReq\",\n \"Write Req\": \"&b8_writeReq\",\n \"AtomicReq\": \"&b8_atomicReq\",\n \"EA Read Req\": \"&b8_eaReadReq\",\n \"EA Write Req\": \"&b8_eaWriteReq\",\n \"EA AtomicReq\": \"&b8_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b8_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b8_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b8_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b8_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b8_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b8_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b8_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b8_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b8_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b8_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"9\",\n \"Hit Rate\": \"&b9_hitRate\",\n \"Req\": \"&b9_req\",\n \"Read Req\": \"&b9_readReq\",\n \"Write Req\": \"&b9_writeReq\",\n \"AtomicReq\": \"&b9_atomicReq\",\n \"EA Read Req\": \"&b9_eaReadReq\",\n \"EA Write Req\": \"&b9_eaWriteReq\",\n \"EA AtomicReq\": \"&b9_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b9_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b9_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b9_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b9_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b9_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b9_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b9_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b9_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b9_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b9_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"10\",\n \"Hit Rate\": \"&b10_hitRate\",\n \"Req\": \"&b10_req\",\n \"Read Req\": \"&b10_readReq\",\n \"Write Req\": \"&b10_writeReq\",\n \"AtomicReq\": \"&b10_atomicReq\",\n \"EA Read Req\": \"&b10_eaReadReq\",\n \"EA Write Req\": \"&b10_eaWriteReq\",\n \"EA AtomicReq\": \"&b10_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b10_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b10_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b10_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b10_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b10_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b10_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b10_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b10_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b10_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b10_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"11\",\n \"Hit Rate\": \"&b11_hitRate\",\n \"Req\": \"&b11_req\",\n \"Read Req\": \"&b11_readReq\",\n \"Write Req\": \"&b11_writeReq\",\n \"AtomicReq\": \"&b11_atomicReq\",\n \"EA Read Req\": \"&b11_eaReadReq\",\n \"EA Write Req\": \"&b11_eaWriteReq\",\n \"EA AtomicReq\": \"&b11_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b11_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b11_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b11_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b11_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b11_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b11_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b11_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b11_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b11_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b11_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"12\",\n \"Hit Rate\": \"&b12_hitRate\",\n \"Req\": \"&b12_req\",\n \"Read Req\": \"&b12_readReq\",\n \"Write Req\": \"&b12_writeReq\",\n \"AtomicReq\": \"&b12_atomicReq\",\n \"EA Read Req\": \"&b12_eaReadReq\",\n \"EA Write Req\": \"&b12_eaWriteReq\",\n \"EA AtomicReq\": \"&b12_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b12_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b12_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b12_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b12_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b12_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b12_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b12_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b12_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b12_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b12_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"13\",\n \"Hit Rate\": \"&b13_hitRate\",\n \"Req\": \"&b13_req\",\n \"Read Req\": \"&b13_readReq\",\n \"Write Req\": \"&b13_writeReq\",\n \"AtomicReq\": \"&b13_atomicReq\",\n \"EA Read Req\": \"&b13_eaReadReq\",\n \"EA Write Req\": \"&b13_eaWriteReq\",\n \"EA AtomicReq\": \"&b13_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b13_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b13_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b13_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b13_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b13_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b13_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b13_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b13_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b13_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b13_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"14\",\n \"Hit Rate\": \"&b14_hitRate\",\n \"Req\": \"&b14_req\",\n \"Read Req\": \"&b14_readReq\",\n \"Write Req\": \"&b14_writeReq\",\n \"AtomicReq\": \"&b14_atomicReq\",\n \"EA Read Req\": \"&b14_eaReadReq\",\n \"EA Write Req\": \"&b14_eaWriteReq\",\n \"EA AtomicReq\": \"&b14_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b14_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b14_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b14_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b14_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b14_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b14_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b14_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b14_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b14_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b14_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"15\",\n \"Hit Rate\": \"&b15_hitRate\",\n \"Req\": \"&b15_req\",\n \"Read Req\": \"&b15_readReq\",\n \"Write Req\": \"&b15_writeReq\",\n \"AtomicReq\": \"&b15_atomicReq\",\n \"EA Read Req\": \"&b15_eaReadReq\",\n \"EA Write Req\": \"&b15_eaWriteReq\",\n \"EA AtomicReq\": \"&b15_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b15_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b15_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b15_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b15_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b15_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b15_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b15_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b15_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b15_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b15_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:341", + "Col": 9, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Lat - cycles", + "Selected": true + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 412 + }, + "id": 70, + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&End_Timestamp\", \"&Start_Timestamp\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"b16_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[16]\"]}, \n { \"$add\": [\"&TCC_HIT[16]\", \"&TCC_MISS[16]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b16_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[16]\"}, \"&denom\"] } \n },\n \"b16_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[16]\"}, \"&denom\"] } \n },\n \"b16_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[16]\"}, \"&denom\"] } \n },\n \"b16_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[16]\"}, \"&denom\"] } \n },\n \"b16_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[16]\"}, \"&denom\"] }\n },\n \"b16_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[16]\"}, \"&denom\"] } \n },\n \"b16_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[16]\"}, \"&denom\"] } \n },\n\n \"b16_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[16]\", \"&TCC_EA_RDREQ[16]\"]}, null] } },\n \"b16_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[16]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[16]\", \"&TCC_EA_WRREQ[16]\"]}, null] } },\n \"b16_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[16]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[16]\", \"&TCC_EA_ATOMIC[16]\"]}, null]}},\n \"b16_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[16]\"}, \"&denom\"] }},\n \"b16_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[16]\"}, \"&denom\"] }},\n\n \n \"b17_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[17]\"]}, \n { \"$add\": [\"&TCC_HIT[17]\", \"&TCC_MISS[17]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b17_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[17]\"}, \"&denom\"] } \n },\n \"b17_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[17]\"}, \"&denom\"] } \n },\n \"b17_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[17]\"}, \"&denom\"] } \n },\n \"b17_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[17]\"}, \"&denom\"] }\n },\n \"b17_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[17]\"}, \"&denom\"] } \n },\n \"b17_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[17]\"}, \"&denom\"] } \n },\n \"b17_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[17]\", \"&TCC_EA_RDREQ[17]\"]}, null] } },\n \"b17_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[17]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[17]\", \"&TCC_EA_WRREQ[17]\"]}, null] } },\n \"b17_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[17]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[17]\", \"&TCC_EA_ATOMIC[17]\"]}, null]}},\n \"b17_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[17]\"}, \"&denom\"] }},\n \"b17_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[17]\"}, \"&denom\"] }},\n\n \n \"b18_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[18]\"]}, \n { \"$add\": [\"&TCC_HIT[18]\", \"&TCC_MISS[18]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b18_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[18]\"}, \"&denom\"] }\n },\n \"b18_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[18]\"}, \"&denom\"] } \n },\n \"b18_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[18]\"}, \"&denom\"] }\n },\n \"b18_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[18]\"}, \"&denom\"] }\n },\n \"b18_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[18]\"}, \"&denom\"] }\n },\n \"b18_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[18]\", \"&TCC_EA_RDREQ[18]\"]}, null] } },\n \"b18_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[18]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[18]\", \"&TCC_EA_WRREQ[18]\"]}, null] } },\n \"b18_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[18]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[18]\", \"&TCC_EA_ATOMIC[18]\"]}, null]}},\n \"b18_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[18]\"}, \"&denom\"] }},\n \"b18_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[18]\"}, \"&denom\"] }},\n\n \n \"b19_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[19]\"]}, \n { \"$add\": [\"&TCC_HIT[19]\", \"&TCC_MISS[19]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b19_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[19]\"}, \"&denom\"] } \n },\n \"b19_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[19]\"}, \"&denom\"] } \n },\n \"b19_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[19]\"}, \"&denom\"] }\n },\n \"b19_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[19]\"}, \"&denom\"] }\n },\n \"b19_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[19]\"}, \"&denom\"] }\n },\n \"b19_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[19]\"}, \"&denom\"] } \n },\n \"b19_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[19]\", \"&TCC_EA_RDREQ[19]\"]}, null] } },\n \"b19_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[19]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[19]\", \"&TCC_EA_WRREQ[19]\"]}, null] } },\n \"b19_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[19]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[19]\", \"&TCC_EA_ATOMIC[19]\"]}, null]}},\n \"b19_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[19]\"}, \"&denom\"] }},\n \"b19_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[19]\"}, \"&denom\"] }},\n\n \n \"b20_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[20]\"]}, \n { \"$add\": [\"&TCC_HIT[20]\", \"&TCC_MISS[20]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b20_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[20]\"}, \"&denom\"] } \n },\n \"b20_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[20]\"}, \"&denom\"] } \n },\n \"b20_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[20]\"}, \"&denom\"] }\n },\n \"b20_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[20]\"}, \"&denom\"] } \n },\n \"b20_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[20]\"}, \"&denom\"] } \n },\n \"b20_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[20]\", \"&TCC_EA_RDREQ[20]\"]}, null] } },\n \"b20_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[20]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[20]\", \"&TCC_EA_WRREQ[20]\"]}, null] } },\n \"b20_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[20]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[20]\", \"&TCC_EA_ATOMIC[20]\"]}, null]}},\n \"b20_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[20]\"}, \"&denom\"] }},\n \"b20_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[20]\"}, \"&denom\"] }},\n\n \n\n \"b21_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[21]\"]}, \n { \"$add\": [\"&TCC_HIT[21]\", \"&TCC_MISS[21]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b21_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[21]\"}, \"&denom\"] } \n },\n \"b21_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[21]\"}, \"&denom\"] } \n },\n \"b21_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[21]\"}, \"&denom\"] } \n },\n \"b21_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[21]\"}, \"&denom\"] } \n },\n \"b21_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[21]\"}, \"&denom\"] } \n },\n \"b21_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[21]\", \"&TCC_EA_RDREQ[21]\"]}, null] } },\n \"b21_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[21]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[21]\", \"&TCC_EA_WRREQ[21]\"]}, null] } },\n \"b21_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[21]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[21]\", \"&TCC_EA_ATOMIC[21]\"]}, null]}},\n \"b21_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[21]\"}, \"&denom\"] }},\n \"b21_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[21]\"}, \"&denom\"] }},\n\n \n\n \"b22_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[22]\"]}, \n { \"$add\": [\"&TCC_HIT[22]\", \"&TCC_MISS[22]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b22_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[22]\"}, \"&denom\"] } \n },\n \"b22_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[22]\"}, \"&denom\"] } \n },\n \"b22_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[22]\"}, \"&denom\"] } \n },\n \"b22_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[22]\"}, \"&denom\"] } \n },\n \"b22_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[22]\"}, \"&denom\"] }\n },\n \"b22_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[22]\"}, \"&denom\"] } \n },\n \"b22_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[22]\", \"&TCC_EA_RDREQ[22]\"]}, null] } },\n \"b22_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[22]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[22]\", \"&TCC_EA_WRREQ[22]\"]}, null] } },\n \"b22_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[22]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[22]\", \"&TCC_EA_ATOMIC[22]\"]}, null]}},\n \"b22_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[22]\"}, \"&denom\"] }},\n \"b22_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[22]\"}, \"&denom\"] }},\n\n \n\n \"b23_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[23]\"]}, \n { \"$add\": [\"&TCC_HIT[23]\", \"&TCC_MISS[23]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b23_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[23]\"}, \"&denom\"] } \n },\n \"b23_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[23]\"}, \"&denom\"] } \n },\n \"b23_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[23]\"}, \"&denom\"] } \n },\n \"b23_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[23]\"}, \"&denom\"] } \n },\n \"b23_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[23]\"}, \"&denom\"] }\n },\n \"b23_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[23]\"}, \"&denom\"] } \n },\n \"b23_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[23]\", \"&TCC_EA_RDREQ[23]\"]}, null] } },\n \"b23_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[23]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[23]\", \"&TCC_EA_WRREQ[23]\"]}, null] } },\n \"b23_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[23]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[23]\", \"&TCC_EA_ATOMIC[23]\"]}, null]}},\n \"b23_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[23]\"}, \"&denom\"] }},\n \"b23_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[23]\"}, \"&denom\"] }},\n\n \n \"b24_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[24]\"]}, \n { \"$add\": [\"&TCC_HIT[24]\", \"&TCC_MISS[24]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b24_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[24]\"}, \"&denom\"] } \n },\n \"b24_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[24]\"}, \"&denom\"] } \n },\n \"b24_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[24]\"}, \"&denom\"] } \n },\n \"b24_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[24]\"}, \"&denom\"] } \n },\n \"b24_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[24]\"}, \"&denom\"] } \n },\n \"b24_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[24]\", \"&TCC_EA_RDREQ[24]\"]}, null] } },\n \"b24_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[24]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[24]\", \"&TCC_EA_WRREQ[24]\"]}, null] } },\n \"b24_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[24]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[24]\", \"&TCC_EA_ATOMIC[24]\"]}, null]}},\n \"b24_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[24]\"}, \"&denom\"] }},\n \"b24_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[24]\"}, \"&denom\"] }},\n\n \n \"b25_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[25]\"]}, \n { \"$add\": [\"&TCC_HIT[25]\", \"&TCC_MISS[25]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b25_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[25]\"}, \"&denom\"] } \n },\n \"b25_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[25]\"}, \"&denom\"] } \n },\n \"b25_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[25]\"}, \"&denom\"] } \n },\n \"b25_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[25]\"}, \"&denom\"] } \n },\n \"b25_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[25]\"}, \"&denom\"] } \n },\n \"b25_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[25]\", \"&TCC_EA_RDREQ[25]\"]}, null] } },\n \"b25_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[25]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[25]\", \"&TCC_EA_WRREQ[25]\"]}, null] } },\n \"b25_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[25]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[25]\", \"&TCC_EA_ATOMIC[25]\"]}, null]}},\n \"b25_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[25]\"}, \"&denom\"] }},\n \"b25_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[25]\"}, \"&denom\"] }},\n\n \n \"b26_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[26]\"]}, \n { \"$add\": [\"&TCC_HIT[26]\", \"&TCC_MISS[26]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b26_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[26]\"}, \"&denom\"] } \n },\n \"b26_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[26]\"}, \"&denom\"] } \n },\n \"b26_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[26]\"}, \"&denom\"] } \n },\n \"b26_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[26]\"}, \"&denom\"] } \n },\n \"b26_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[26]\"}, \"&denom\"] } \n },\n \"b26_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[26]\", \"&TCC_EA_RDREQ[26]\"]}, null] } },\n \"b26_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[26]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[26]\", \"&TCC_EA_WRREQ[26]\"]}, null] } },\n \"b26_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[26]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[26]\", \"&TCC_EA_ATOMIC[26]\"]}, null]}},\n \"b26_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[26]\"}, \"&denom\"] }},\n \"b26_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[26]\"}, \"&denom\"] }},\n\n \n \"b27_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[27]\"]}, \n { \"$add\": [\"&TCC_HIT[27]\", \"&TCC_MISS[27]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b27_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[27]\"}, \"&denom\"] } \n },\n \"b27_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[27]\"}, \"&denom\"] } \n },\n \"b27_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[27]\"}, \"&denom\"] } \n },\n \"b27_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[27]\"}, \"&denom\"] } \n },\n \"b27_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[27]\"}, \"&denom\"] } \n },\n \"b27_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[27]\", \"&TCC_EA_RDREQ[27]\"]}, null] } },\n \"b27_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[27]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[27]\", \"&TCC_EA_WRREQ[27]\"]}, null] } },\n \"b27_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[27]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[27]\", \"&TCC_EA_ATOMIC[27]\"]}, null]}},\n \"b27_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[27]\"}, \"&denom\"] }},\n \"b27_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[27]\"}, \"&denom\"] }},\n\n \n \"b28_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[28]\"]}, \n { \"$add\": [\"&TCC_HIT[28]\", \"&TCC_MISS[28]\"] }\n ] \n },\n null \n ] \n }\n },\n \"b28_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[28]\"}, \"&denom\"] } \n },\n \"b28_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[28]\"}, \"&denom\"] } \n },\n \"b28_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[28]\"}, \"&denom\"] } \n },\n \"b28_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[28]\"}, \"&denom\"] } \n },\n \"b28_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[28]\"}, \"&denom\"] } \n },\n \"b28_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[28]\", \"&TCC_EA_RDREQ[28]\"]}, null] } },\n \"b28_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[28]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[28]\", \"&TCC_EA_WRREQ[28]\"]}, null] } },\n \"b28_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[28]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[28]\", \"&TCC_EA_ATOMIC[28]\"]}, null]}},\n \"b28_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[28]\"}, \"&denom\"] }},\n \"b28_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[28]\"}, \"&denom\"] }},\n\n \n \"b29_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[29]\"]}, \n { \"$add\": [\"&TCC_HIT[29]\", \"&TCC_MISS[29]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b29_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[29]\"}, \"&denom\"] } \n },\n \"b29_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[29]\"}, \"&denom\"] } \n },\n \"b29_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[29]\"}, \"&denom\"] } \n },\n \"b29_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[29]\"}, \"&denom\"] } \n },\n \"b29_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[29]\"}, \"&denom\"] }\n },\n \"b29_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[29]\"}, \"&denom\"] } \n },\n \"b29_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[29]\", \"&TCC_EA_RDREQ[29]\"]}, null] } },\n \"b29_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[29]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[29]\", \"&TCC_EA_WRREQ[29]\"]}, null] } },\n \"b29_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[29]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[29]\", \"&TCC_EA_ATOMIC[29]\"]}, null]}},\n \"b29_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[29]\"}, \"&denom\"] }},\n \"b29_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[29]\"}, \"&denom\"] }},\n\n \n \"b30_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[30]\"]}, \n { \"$add\": [\"&TCC_HIT[30]\", \"&TCC_MISS[30]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b30_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[30]\"}, \"&denom\"] } \n },\n \"b30_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[30]\"}, \"&denom\"] } \n },\n \"b30_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[30]\"}, \"&denom\"] } \n },\n \"b30_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[30]\"}, \"&denom\"] } \n },\n \"b30_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[30]\"}, \"&denom\"] } \n },\n \"b30_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[30]\", \"&TCC_EA_RDREQ[30]\"]}, null] } },\n \"b30_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[30]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[30]\", \"&TCC_EA_WRREQ[30]\"]}, null] } },\n \"b30_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[30]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[30]\", \"&TCC_EA_ATOMIC[30]\"]}, null]}},\n \"b30_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[30]\"}, \"&denom\"] }},\n \"b30_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[30]\"}, \"&denom\"] }},\n\n \n \"b31_hitRate\": {\n \"$avg\":{ \n \"$cond\": [ \n {\"$ne\": [{ \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }, 0]}, \n {\"$divide\": [\n { \"$multiply\": [100, \"&TCC_HIT[31]\"]}, \n { \"$add\": [\"&TCC_HIT[31]\", \"&TCC_MISS[31]\"] }\n ] \n },\n null\n ] \n }\n },\n \"b31_req\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_REQ[31]\"}, \"&denom\"] } \n },\n \"b31_readReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_READ[31]\"}, \"&denom\"] } \n },\n \"b31_writeReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_WRITE[31]\"}, \"&denom\"] } \n },\n \"b31_atomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_ATOMIC[31]\"}, \"&denom\"] } \n },\n \"b31_eaReadReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaWriteReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ[31]\"}, \"&denom\"] } \n },\n \"b31_eaAtomicReq\": {\n \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_ATOMIC[31]\"}, \"&denom\"] }\n },\n \"b31_eaReadLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_RDREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_RDREQ_LEVEL[31]\", \"&TCC_EA_RDREQ[31]\"]}, null] } },\n \"b31_eaWriteLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_WRREQ[31]\", 0]}, {\"$divide\": [\"&TCC_EA_WRREQ_LEVEL[31]\", \"&TCC_EA_WRREQ[31]\"]}, null] } },\n \"b31_eaAtomicLat\": { \"$avg\": {\"$cond\": [{\"$ne\": [\"&TCC_EA_ATOMIC[31]\", 0]}, {\"$divide\": [\"&TCC_EA_ATOMIC_LEVEL[31]\", \"&TCC_EA_ATOMIC[31]\"]}, null]}},\n \"b31_ea_read_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_read_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_RDREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_io_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_IO_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_gmi_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_GMI_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_dram_credit\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_EA_WRREQ_DRAM_CREDIT_STALL[31]\"}, \"&denom\"] }},\n \"b31_ea_write_stall_too_many\": { \"$avg\":{ \"$divide\":[{\"$toInt\": \"&TCC_TOO_MANY_EA_WRREQS_STALL[31]\"}, \"&denom\"] }}\n\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"Channel\": \"16\",\n \"Hit Rate\": \"&b16_hitRate\",\n \"Req\": \"&b16_req\",\n \"Read Req\": \"&b16_readReq\",\n \"Write Req\": \"&b16_writeReq\",\n \"AtomicReq\": \"&b16_atomicReq\",\n \"EA Read Req\": \"&b16_eaReadReq\",\n \"EA Write Req\": \"&b16_eaWriteReq\",\n \"EA AtomicReq\": \"&b16_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b16_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b16_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b16_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b16_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b16_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b16_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b16_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b16_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b16_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b16_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"17\",\n \"Hit Rate\": \"&b17_hitRate\",\n \"Req\": \"&b17_req\",\n \"Read Req\": \"&b17_readReq\",\n \"Write Req\": \"&b17_writeReq\",\n \"AtomicReq\": \"&b17_atomicReq\",\n \"EA Read Req\": \"&b17_eaReadReq\",\n \"EA Write Req\": \"&b17_eaWriteReq\",\n \"EA AtomicReq\": \"&b17_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b17_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b17_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b17_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b17_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b17_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b17_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b17_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b17_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b17_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b17_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"18\",\n \"Hit Rate\": \"&b18_hitRate\",\n \"Req\": \"&b18_req\",\n \"Read Req\": \"&b18_readReq\",\n \"Write Req\": \"&b18_writeReq\",\n \"AtomicReq\": \"&b18_atomicReq\",\n \"EA Read Req\": \"&b18_eaReadReq\",\n \"EA Write Req\": \"&b18_eaWriteReq\",\n \"EA AtomicReq\": \"&b18_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b18_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b18_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b18_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b18_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b18_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b18_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b18_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b18_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b18_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b18_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"19\",\n \"Hit Rate\": \"&b19_hitRate\",\n \"Req\": \"&b19_req\",\n \"Read Req\": \"&b19_readReq\",\n \"Write Req\": \"&b19_writeReq\",\n \"AtomicReq\": \"&b19_atomicReq\",\n \"EA Read Req\": \"&b19_eaReadReq\",\n \"EA Write Req\": \"&b19_eaWriteReq\",\n \"EA AtomicReq\": \"&b19_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b19_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b19_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b19_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b19_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b19_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b19_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b19_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b19_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b19_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b19_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"20\",\n \"Hit Rate\": \"&b20_hitRate\",\n \"Req\": \"&b20_req\",\n \"Read Req\": \"&b20_readReq\",\n \"Write Req\": \"&b20_writeReq\",\n \"AtomicReq\": \"&b20_atomicReq\",\n \"EA Read Req\": \"&b20_eaReadReq\",\n \"EA Write Req\": \"&b20_eaWriteReq\",\n \"EA AtomicReq\": \"&b20_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b20_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b20_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b20_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b20_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b20_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b20_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b20_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b20_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b20_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b20_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"21\",\n \"Hit Rate\": \"&b21_hitRate\",\n \"Req\": \"&b21_req\",\n \"Read Req\": \"&b21_readReq\",\n \"Write Req\": \"&b21_writeReq\",\n \"AtomicReq\": \"&b21_atomicReq\",\n \"EA Read Req\": \"&b21_eaReadReq\",\n \"EA Write Req\": \"&b21_eaWriteReq\",\n \"EA AtomicReq\": \"&b21_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b21_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b21_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b21_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b21_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b21_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b21_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b21_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b21_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b21_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b21_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"22\",\n \"Hit Rate\": \"&b22_hitRate\",\n \"Req\": \"&b22_req\",\n \"Read Req\": \"&b22_readReq\",\n \"Write Req\": \"&b22_writeReq\",\n \"AtomicReq\": \"&b22_atomicReq\",\n \"EA Read Req\": \"&b22_eaReadReq\",\n \"EA Write Req\": \"&b22_eaWriteReq\",\n \"EA AtomicReq\": \"&b22_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b22_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b22_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b22_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b22_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b22_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b22_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b22_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b22_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b22_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b22_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"23\",\n \"Hit Rate\": \"&b23_hitRate\",\n \"Req\": \"&b23_req\",\n \"Read Req\": \"&b23_readReq\",\n \"Write Req\": \"&b23_writeReq\",\n \"AtomicReq\": \"&b23_atomicReq\",\n \"EA Read Req\": \"&b23_eaReadReq\",\n \"EA Write Req\": \"&b23_eaWriteReq\",\n \"EA AtomicReq\": \"&b23_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b23_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b23_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b23_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b23_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b23_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b23_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b23_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b23_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b23_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b23_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"24\",\n \"Hit Rate\": \"&b24_hitRate\",\n \"Req\": \"&b24_req\",\n \"Read Req\": \"&b24_readReq\",\n \"Write Req\": \"&b24_writeReq\",\n \"AtomicReq\": \"&b24_atomicReq\",\n \"EA Read Req\": \"&b24_eaReadReq\",\n \"EA Write Req\": \"&b24_eaWriteReq\",\n \"EA AtomicReq\": \"&b24_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b24_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b24_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b24_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b24_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b24_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b24_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b24_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b24_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b24_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b24_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"25\",\n \"Hit Rate\": \"&b25_hitRate\",\n \"Req\": \"&b25_req\",\n \"Read Req\": \"&b25_readReq\",\n \"Write Req\": \"&b25_writeReq\",\n \"AtomicReq\": \"&b25_atomicReq\",\n \"EA Read Req\": \"&b25_eaReadReq\",\n \"EA Write Req\": \"&b25_eaWriteReq\",\n \"EA AtomicReq\": \"&b25_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b25_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b25_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b25_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b25_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b25_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b25_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b25_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b25_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b25_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b25_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"26\",\n \"Hit Rate\": \"&b26_hitRate\",\n \"Req\": \"&b26_req\",\n \"Read Req\": \"&b26_readReq\",\n \"Write Req\": \"&b26_writeReq\",\n \"AtomicReq\": \"&b26_atomicReq\",\n \"EA Read Req\": \"&b26_eaReadReq\",\n \"EA Write Req\": \"&b26_eaWriteReq\",\n \"EA AtomicReq\": \"&b26_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b26_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b26_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b26_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b26_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b26_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b26_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b26_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b26_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b26_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b26_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n\n },\n {\n \"Channel\": \"27\",\n \"Hit Rate\": \"&b27_hitRate\",\n \"Req\": \"&b27_req\",\n \"Read Req\": \"&b27_readReq\",\n \"Write Req\": \"&b27_writeReq\",\n \"AtomicReq\": \"&b27_atomicReq\",\n \"EA Read Req\": \"&b27_eaReadReq\",\n \"EA Write Req\": \"&b27_eaWriteReq\",\n \"EA AtomicReq\": \"&b27_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b27_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b27_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b27_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b27_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b27_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b27_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b27_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b27_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b27_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b27_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"28\",\n \"Hit Rate\": \"&b28_hitRate\",\n \"Req\": \"&b28_req\",\n \"Read Req\": \"&b28_readReq\",\n \"Write Req\": \"&b28_writeReq\",\n \"AtomicReq\": \"&b28_atomicReq\",\n \"EA Read Req\": \"&b28_eaReadReq\",\n \"EA Write Req\": \"&b28_eaWriteReq\",\n \"EA AtomicReq\": \"&b28_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b28_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b28_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b28_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b28_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b28_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b28_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b28_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b28_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b28_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b28_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"29\",\n \"Hit Rate\": \"&b29_hitRate\",\n \"Req\": \"&b29_req\",\n \"Read Req\": \"&b29_readReq\",\n \"Write Req\": \"&b29_writeReq\",\n \"AtomicReq\": \"&b29_atomicReq\",\n \"EA Read Req\": \"&b29_eaReadReq\",\n \"EA Write Req\": \"&b29_eaWriteReq\",\n \"EA AtomicReq\": \"&b29_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b29_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b29_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b29_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b29_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b29_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b29_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b29_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b29_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b29_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b29_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"30\",\n \"Hit Rate\": \"&b30_hitRate\",\n \"Req\": \"&b30_req\",\n \"Read Req\": \"&b30_readReq\",\n \"Write Req\": \"&b30_writeReq\",\n \"AtomicReq\": \"&b30_atomicReq\",\n \"EA Read Req\": \"&b30_eaReadReq\",\n \"EA Write Req\": \"&b30_eaWriteReq\",\n \"EA AtomicReq\": \"&b30_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b30_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b30_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b30_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b30_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b30_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b30_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b30_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b30_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b30_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b30_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n },\n {\n \"Channel\": \"31\",\n \"Hit Rate\": \"&b31_hitRate\",\n \"Req\": \"&b31_req\",\n \"Read Req\": \"&b31_readReq\",\n \"Write Req\": \"&b31_writeReq\",\n \"AtomicReq\": \"&b31_atomicReq\",\n \"EA Read Req\": \"&b31_eaReadReq\",\n \"EA Write Req\": \"&b31_eaWriteReq\",\n \"EA AtomicReq\": \"&b31_eaAtomicReq\",\n \"EA Read Lat - cycles\": \"&b31_eaReadLat\",\n \"EA Write Lat - cycles\": \"&b31_eaWriteLat\",\n \"EA Atomic Lat - cycles\": \"&b31_eaAtomicLat\",\n \"EA Read Stall - IO\": \"&b31_ea_read_stall_io_credit\",\n \"EA Read Stall - GMI\": \"&b31_ea_read_stall_gmi_credit\",\n \"EA Read Stall - DRAM\": \"&b31_ea_read_stall_dram_credit\",\n \"EA Write Stall - IO\": \"&b31_ea_write_stall_io_credit\",\n \"EA Write Stall - GMI\": \"&b31_ea_write_stall_gmi_credit\",\n \"EA Write Stall - DRAM\": \"&b31_ea_write_stall_dram_credit\",\n \"EA Write Stall - Starve\": \"&b31_ea_write_stall_too_many\",\n \"Units\": \"&denom\"\n\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "type": "table" + } + ], + "title": "L2 - EA Read Latency (Channel 16-31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 420 + }, + "id": 93, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 10, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 420 + }, + "id": 94, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 428 + }, + "id": 187, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 0 - 15) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 11, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Atomic Lat - cycles", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 428 + }, + "id": 201, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Atomic Latency (Channel 16 - 31) (Cycles)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 436 + }, + "id": 220, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 12, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 436 + }, + "id": 227, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 444 + }, + "id": 221, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 13, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 444 + }, + "id": 228, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 452 + }, + "id": 222, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 14, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Read Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 452 + }, + "id": 229, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Read Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 460 + }, + "id": 223, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 15, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - IO", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 460 + }, + "id": 230, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - I/O (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 468 + }, + "id": 225, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 16, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - GMI", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 468 + }, + "id": 231, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - GMI (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 476 + }, + "id": 224, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 17, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - DRAM", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 476 + }, + "id": 232, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Stall - HBM (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 484 + }, + "id": 226, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 68, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 0 - 15) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + }, + { + "Aggregate": "last", + "BarPadding": 10, + "BaseLineColor": "#ff0000", + "BaseLineWidth": 1, + "CurveType": "Monotone", + "DateFormat": "YYYY-MM-DD HH:mm:ss", + "DateTimeColName": "date", + "DotColor": "white", + "DotSize": 10, + "EvenRowColor": "rgba(61, 61, 64, 0.78)", + "FlashHighLimitBar": false, + "FlashLowLimitBar": false, + "GroupCols": 0, + "GroupGap": 5, + "GroupLabelColor": "#ffffff", + "GroupLabelFontSize": "200%", + "GroupNameFilter": "", + "GroupRenamingRules": [], + "GroupSortString": "", + "HighAxisColor": "#ffffff", + "HighAxisWidth": 1, + "HighBarColor": "rgb(120, 128, 0)", + "HighLimitBarColor": "#ff0000", + "HighLimitBarFlashColor": "#ffa500", + "HighLimitBarFlashTimeout": 1000, + "HighLimitLineColor": "#ff0000", + "HighLmitLineWidth": 1, + "HighSideMargin": 22, + "Horizontal": false, + "LabelColName": "Channel", + "LabelColor": "#ffffff", + "LabelFontSize": "70%", + "LabelNameFilter": "", + "LabelRenamingRules": [], + "LableAngle": 0, + "Legend": false, + "LineColor": "blue", + "LineWidth": 5, + "Links": [], + "LowAxisColor": "#ffffff", + "LowAxisWidth": 1, + "LowBarColor": "teal", + "LowLimitBarColor": "#ff0000", + "LowLimitBarFlashColor": "#ffa500", + "LowLimitBarFlashTimeout": 200, + "LowLimitLineColor": "#ff0000", + "LowLmitLineWidth": 1, + "LowSideMargin": 50, + "MaxLineColor": "rgb(74, 232, 12)", + "MaxLineWidth": 1, + "MinLineColor": "#ff0000", + "MinLineWidth": 1, + "MultiBarPadding": 10, + "OddRowColor": "rgba(33, 33, 34, 0.92)", + "OutOfRangeLabelColor": "#ffffff", + "OutlineColor": "rgba(245, 255, 0, 0.1)", + "RecolorHighLimitBar": false, + "RecolorLowLimitBar": false, + "RecolorRules": [], + "ScaleFactor": 1, + "ShowBars": true, + "ShowBaseLine": false, + "ShowDate": false, + "ShowGroupLabels": true, + "ShowHighLimitLine": false, + "ShowLabels": true, + "ShowLeftAxis": true, + "ShowLines": false, + "ShowLowLimitLine": false, + "ShowMaxLine": false, + "ShowMinLine": false, + "ShowRightAxis": true, + "ShowValues": true, + "SortColName": "value", + "SortDirection": "ascending", + "TZOffsetHours": 0, + "ToolTipFontSize": "100%", + "ToolTipType": "", + "TooltipDateFormat": "YYYY-MM-DD HH:mm:ss", + "VGroupGap": 5, + "ValueColName": "", + "ValueColor": "#ffffff", + "ValueDecimals": 0, + "ValueFontSize": "70%", + "ValuePosition": "top", + "Values": [ + { + "$$hashKey": "object:277", + "Col": 18, + "HighBarColor": "rgb(120, 128, 0)", + "LowBarColor": "teal", + "Name": "EA Write Stall - Starve", + "Selected": true + } + ], + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 484 + }, + "id": 233, + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 70, + "refId": "A" + } + ], + "title": "L2 - EA Write Starve (Channel 16 - 31) (Cycles $normUnit)", + "type": "michaeldmoore-multistat-panel" + } + ], + "refresh": "", + "schemaVersion": 34, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + "hide": 0, + "includeAll": false, + "label": "Normalization", + "multi": false, + "name": "normUnit", + "options": [ + { + "selected": true, + "text": "\"per Wave\"", + "value": "\"per Wave\"" + }, + { + "selected": false, + "text": "\"per Cycle\"", + "value": "\"per Cycle\"" + }, + { + "selected": false, + "text": "\"per Sec\"", + "value": "\"per Sec\"" + }, + { + "selected": false, + "text": "\"per Kernel\"", + "value": "\"per Kernel\"" + } + ], + "query": "\"per Wave\",\n\"per Cycle\",\n\"per Sec\",\n\"per Kernel\"", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&lds_banks_per_cu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "L2 Channels", + "multi": false, + "name": "lds_banks_per_cu", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&lds_banks_per_cu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&se_per_gpu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SEs", + "multi": false, + "name": "numSE", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&se_per_gpu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "104", + "value": "104" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&cu_per_gpu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#CUs", + "multi": false, + "name": "cu_per_gpu", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&cu_per_gpu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_waves_per_cu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Max Waves/CU", + "multi": false, + "name": "max_waves_per_cu", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_waves_per_cu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1700", + "value": "1700" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SCLK (MHz)", + "multi": false, + "name": "sclk", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "56", + "value": "56" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sqc_per_gpu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "#SQC", + "multi": false, + "name": "sqc_per_gpu", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sqc_per_gpu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1638.4", + "value": "1638.4" + }, + "definition": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbm_bw\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "HBM BW (GB/s)", + "multi": false, + "name": "hbmBW", + "options": [], + "query": "$Workload1.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbm_bw\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "rocprofiler-compute_pymongo4_Vcopy_gfx90a", + "value": "rocprofiler-compute_pymongo4_Vcopy_gfx90a" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Workload", + "multi": false, + "name": "Workload1", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "29467", + "value": "29467" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Kernel Cycles", + "multi": false, + "name": "kernelBusyCycles", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"myAvg\": {\n \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [ \"&End_Timestamp\", \"&Start_Timestamp\"] }, 1000] }, $sclk] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": { \"$round\": [\"&myAvg\", 0] }\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "71", + "value": "71" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu] }, 8] }] }, $cu_per_gpu] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Active CUs", + "multi": false, + "name": "numActiveCUs", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu] }, 8] }] }, $cu_per_gpu] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Dispatch_ID\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Filtered Dispatch ID", + "multi": false, + "name": "DispatchIDFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"GPU_ID\": { \"$in\": [${gpuFilter:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Dispatch_ID\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Dispatch Filter", + "name": "DispatchID", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "2", + "value": "2" + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&GPU_ID\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "GCD", + "multi": false, + "name": "gpuFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&GPU_ID\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&Kernel_Name\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Kernels", + "multi": true, + "name": "KernelNameFilter", + "options": [], + "query": "$Workload1.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&Kernel_Name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "rocprofiler-compute_pymongo4_Vcopy_gfx908", + "value": "rocprofiler-compute_pymongo4_Vcopy_gfx908" + }, + "definition": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline Workload", + "multi": false, + "name": "Workload2", + "options": [], + "query": "workload_names.names.aggregate([\n {\"$group\": {\n \"_id\": \"&name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "69", + "value": "69" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu2] }, 8] }] }, $cu_per_gpu2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline ActiveCUs", + "multi": false, + "name": "numActiveCUs2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n \"Dispatch_ID\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n\n {\"$group\": {\n \"_id\": null,\n \"theAvg\": {\n \"$avg\": {\n \"$cond\":[\n {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n \"\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $max_waves_per_cu2] }, 8] }] }, $cu_per_gpu2] }}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Dispatch_ID\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "description": "Defaults to all DispatchIDs", + "hide": 2, + "includeAll": true, + "label": "Baseline Dispatch IDs", + "multi": false, + "name": "DispatchIDFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"GPU_ID\": { \"$in\": [${gpuFilter2:raw}] },\n \"Kernel_Name\": { \"$in\": ${KernelNameFilter2:json}}\n }},\n {\"$group\": {\n \"_id\": \"&Dispatch_ID\"\n }},\n {\"$sort\": {\n \"_id\": 1\n }}\n]);", + "refresh": 1, + "regex": "${DispatchID2:text}", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "description": "Desired DispatchID filters as regex ex. (1|18)", + "hide": 0, + "label": "Baseline Dispatch Filter", + "name": "DispatchID2", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "2", + "value": "2" + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&GPU_ID\"\n }}\n]);", + "hide": 0, + "includeAll": false, + "label": "Baseline GCD", + "multi": false, + "name": "gpuFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&GPU_ID\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&Kernel_Name\"\n }}\n]);", + "hide": 0, + "includeAll": true, + "label": "Baseline Kernels", + "multi": true, + "name": "KernelNameFilter2", + "options": [], + "query": "$Workload2.pmc_perf.aggregate([\n {\"$group\": {\n \"_id\": \"&Kernel_Name\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "System Info" + ], + "value": [ + "System Info" + ] + }, + "hide": 0, + "includeAll": false, + "label": "Comparison Panels", + "multi": true, + "name": "select", + "options": [ + { + "selected": true, + "text": "System Info", + "value": "System Info" + }, + { + "selected": false, + "text": "System Speed-of-Light", + "value": "System Speed-of-Light" + }, + { + "selected": false, + "text": "Roofline", + "value": "Roofline" + }, + { + "selected": false, + "text": "Command Processor", + "value": "Command Processor" + }, + { + "selected": false, + "text": "Shader Processor Input", + "value": "Shader Processor Input" + }, + { + "selected": false, + "text": "Wavefront", + "value": "Wavefront" + }, + { + "selected": false, + "text": "Compute Pipeline", + "value": "Compute Pipeline" + }, + { + "selected": false, + "text": "Instruction Mix", + "value": "Instruction Mix" + }, + { + "selected": false, + "text": "Local Data Share", + "value": "Local Data Share" + }, + { + "selected": false, + "text": "Instruction Cache", + "value": "Instruction Cache" + }, + { + "selected": false, + "text": "Scalar L1D Cache", + "value": "Scalar L1D Cache" + }, + { + "selected": false, + "text": "Texture Addr and Data", + "value": "Texture Addr and Data" + }, + { + "selected": false, + "text": "Vector L1D Cache", + "value": "Vector L1D Cache" + }, + { + "selected": false, + "text": "L2 Cache", + "value": "L2 Cache" + } + ], + "query": "System Info, \nSystem Speed-of-Light, \nRoofline,\nCommand Processor, \nShader Processor Input, \nWavefront,\nCompute Pipeline, \nInstruction Mix,\nLocal Data Share, \nInstruction Cache, \nScalar L1D Cache, \nTexture Addr and Data, \nVector L1D Cache,\nL2 Cache", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "32", + "value": "32" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&lds_banks_per_cu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline L2 Channels", + "multi": false, + "name": "lds_banks_per_cu2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&lds_banks_per_cu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "8", + "value": "8" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&se_per_gpu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SEs", + "multi": false, + "name": "numSE2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&se_per_gpu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "120", + "value": "120" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&cu_per_gpu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #CUs", + "multi": false, + "name": "cu_per_gpu2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&cu_per_gpu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "40", + "value": "40" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_waves_per_cu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline Max Waves/CU", + "multi": false, + "name": "max_waves_per_cu2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_waves_per_cu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1502", + "value": "1502" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_sclk\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline SCLK (MHz)", + "multi": false, + "name": "sclk2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&max_sclk\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "64", + "value": "64" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sqc_per_gpu\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline #SQC", + "multi": false, + "name": "sqc_per_gpu2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&sqc_per_gpu\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1228.8", + "value": "1228.8" + }, + "definition": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbm_bw\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "Baseline HBM BW (GB/s)", + "multi": false, + "name": "hbmBW2", + "options": [], + "query": "$Workload2.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&hbm_bw\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "MI200", + "value": "MI200" + }, + "definition": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu_model\"\n }}\n]);", + "hide": 2, + "includeAll": false, + "label": "SOC", + "multi": false, + "name": "soc", + "options": [], + "query": "${Workload1}.sysinfo.aggregate([\n {\"$group\": {\n \"_id\": \"&gpu_model\"\n }}\n]);", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "5", + "value": "5" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "TopN", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "20", + "value": "20" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "query": "1,5,10,15,20,50,100", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2021-11-04T14:21:39.749Z", + "to": "2021-11-08T14:21:39.749Z" + }, + "timepicker": {}, + "timezone": "", + "title": "rocprofiler-compute_v2.0_pubx", + "uid": "rocprofiler-compute_v20x", + "version": 2, + "weekStart": "" +} diff --git a/projects/rocprofiler-compute/grafana/docker-compose.yml b/projects/rocprofiler-compute/grafana/docker-compose.yml new file mode 100644 index 0000000000..68003200bb --- /dev/null +++ b/projects/rocprofiler-compute/grafana/docker-compose.yml @@ -0,0 +1,43 @@ +# ----------------------------------------------------------------------- +# NOTE: +# Dependencies are not included as part of ROCm Compute Profiler. +# It's the user's responsibility to accept any licensing implications +# before building the project +# ----------------------------------------------------------------------- + +version: "3.3" + +services: + web: + image: rocprofiler-compute-grafana-v1.0 + container_name: rocprofiler-compute-grafana-v1.0 + restart: always + build: . + environment: + - GF_PATHS_CONFIG="grafana/etc/grafana.ini" + - GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=amd-rocprofiler-compute-data-plugin + - GF_DEFAULT_APP_MODE=development + ports: + - "14000:4000" + volumes: + - grafana-storage:/var/lib/grafana + stdin_open: true + tty: true + db_mongo: + container_name: mongo + image: mongo + restart: always + environment: + MONGO_INITDB_ROOT_USERNAME: temp + MONGO_INITDB_ROOT_PASSWORD: temp123 + volumes: + - grafana-mongo-db:/data/db + ports: + - "27018:27017" + command: mongod --bind_ip 0.0.0.0 + +volumes: + grafana-mongo-db: + external: true + grafana-storage: + external: true diff --git a/projects/rocprofiler-compute/grafana/docker-entrypoint.sh b/projects/rocprofiler-compute/grafana/docker-entrypoint.sh new file mode 100755 index 0000000000..3775f06c28 --- /dev/null +++ b/projects/rocprofiler-compute/grafana/docker-entrypoint.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +##############################################################################bl +# MIT License +# +# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +pushd /var/lib/grafana/plugins/rocprofiler-compute_plugin +npm run server & +popd + +service grafana-server start + +if [ -z "$1" ]; then + exec bash +else + eval $@ +fi diff --git a/projects/rocprofiler-compute/grafana/grafana.ini b/projects/rocprofiler-compute/grafana/grafana.ini new file mode 100644 index 0000000000..d2e07ed15f --- /dev/null +++ b/projects/rocprofiler-compute/grafana/grafana.ini @@ -0,0 +1,1007 @@ +##################### Grafana Configuration Example ##################### +# +# Everything has defaults so you only need to uncomment things you want to +# change + +# possible values : production, development +app_mode = development + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +;instance_name = ${HOSTNAME} + +#################################### Paths #################################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +;data = /var/lib/grafana + +# Temporary files in `data` directory older than given duration will be removed +;temp_data_lifetime = 24h + +# Directory where grafana can store logs +logs = /var/log/grafana + +# Directory where grafana will automatically scan and look for plugins +plugins = /var/lib/grafana/plugins + +# folder that contains provisioning config files that grafana will apply on startup and while running. +;provisioning = conf/provisioning + +#################################### Server #################################### +[server] +# Protocol (http, https, h2, socket) +;protocol = http + +# The ip address to bind to, empty will bind to all interfaces +;http_addr = + +# The http port to use +;http_port = 3000 + +# The public facing domain name used to access grafana from a browser +;domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +;enforce_domain = false + +# The full public facing url you use in browser, used for redirects and emails +# If you use reverse proxy and sub path specify full url (with sub path) +;root_url = %(protocol)s://%(domain)s:%(http_port)s/ + +# Serve Grafana from subpath specified in `root_url` setting. By default it is set to `false` for compatibility reasons. +;serve_from_sub_path = false + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +# Unix socket path +;socket = + +# CDN Url +;cdn_url = + +# Sets the maximum time using a duration format (5s/5m/5ms) before timing out read of an incoming request and closing idle connections. +# `0` means there is no timeout for reading the request. +;read_timeout = 0 + +#################################### Database #################################### +[database] +# You can configure the database connection by specifying type, host, name, user and password +# as separate properties or as on string using the url properties. + +# Either "mysql", "postgres" or "sqlite3", it's your choice +;type = sqlite3 +;host = 127.0.0.1:3306 +;name = grafana +;user = root +# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" +;password = + +# Use either URL or the previous fields to configure the database +# Example: mysql://user:secret@host:port/database +;url = + +# For "postgres" only, either "disable", "require" or "verify-full" +;ssl_mode = disable + +# Database drivers may support different transaction isolation levels. +# Currently, only "mysql" driver supports isolation levels. +# If the value is empty - driver's default isolation level is applied. +# For "mysql" use "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ" or "SERIALIZABLE". +;isolation_level = + +;ca_cert_path = +;client_key_path = +;client_cert_path = +;server_cert_name = + +# For "sqlite3" only, path relative to data_path setting +;path = grafana.db + +# Max idle conn setting default is 2 +;max_idle_conn = 2 + +# Max conn setting default is 0 (mean not set) +;max_open_conn = + +# Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) +;conn_max_lifetime = 14400 + +# Set to true to log the sql calls and execution times. +;log_queries = + +# For "sqlite3" only. cache mode setting used for connecting to the database. (private, shared) +;cache_mode = private + +################################### Data sources ######################### +[datasources] +# Upper limit of data sources that Grafana will return. This limit is a temporary configuration and it will be deprecated when pagination will be introduced on the list data sources API. +;datasource_limit = 5000 + +#################################### Cache server ############################# +[remote_cache] +# Either "redis", "memcached" or "database" default is "database" +;type = database + +# cache connectionstring options +# database: will use Grafana primary database. +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=0,ssl=false`. Only addr is required. ssl may be 'true', 'false', or 'insecure'. +# memcache: 127.0.0.1:11211 +;connstr = + +#################################### Data proxy ########################### +[dataproxy] + +# This enables data proxy logging, default is false +logging = true + +# How long the data proxy waits to read the headers of the response before timing out, default is 30 seconds. +# This setting also applies to core backend HTTP data sources where query requests use an HTTP client with timeout set. +;timeout = 30 + +# How long the data proxy waits to establish a TCP connection before timing out, default is 10 seconds. +;dialTimeout = 10 + +# How many seconds the data proxy waits before sending a keepalive probe request. +;keep_alive_seconds = 30 + +# How many seconds the data proxy waits for a successful TLS Handshake before timing out. +;tls_handshake_timeout_seconds = 10 + +# How many seconds the data proxy will wait for a server's first response headers after +# fully writing the request headers if the request has an "Expect: 100-continue" +# header. A value of 0 will result in the body being sent immediately, without +# waiting for the server to approve. +;expect_continue_timeout_seconds = 1 + +# Optionally limits the total number of connections per host, including connections in the dialing, +# active, and idle states. On limit violation, dials will block. +# A value of zero (0) means no limit. +;max_conns_per_host = 0 + +# The maximum number of idle connections that Grafana will keep alive. +;max_idle_connections = 100 + +# How many seconds the data proxy keeps an idle connection open before timing out. +;idle_conn_timeout_seconds = 90 + +# If enabled and user is not anonymous, data proxy will add X-Grafana-User header with username into the request, default is false. +;send_user_header = false + +#################################### Analytics #################################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +;reporting_enabled = true + +# The name of the distributor of the Grafana instance. Ex hosted-grafana, grafana-labs +;reporting_distributor = grafana-labs + +# Set to false to disable all checks to https://grafana.net +# for new versions (grafana itself and plugins), check is used +# in some UI views to notify that grafana or plugin update exists +# This option does not cause any auto updates, nor send any information +# only a GET request to http://grafana.com to get latest versions +;check_for_updates = true + +# Google Analytics universal tracking code, only enabled if you specify an id here +;google_analytics_ua_id = + +# Google Tag Manager ID, only enabled if you specify an id here +;google_tag_manager_id = + +#################################### Security #################################### +[security] +# disable creation of admin user on first start of grafana +;disable_initial_admin_creation = false + +# default admin user, created on startup +;admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = admin + +# used for signing +;secret_key = SW2YcwTIb9zpOOhoPsMm + +# disable gravatar profile images +;disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +# disable protection against brute force login attempts +;disable_brute_force_login_protection = false + +# set to true if you host Grafana behind HTTPS. default is false. +;cookie_secure = false + +# set cookie SameSite attribute. defaults to `lax`. can be set to "lax", "strict", "none" and "disabled" +;cookie_samesite = lax + +# set to true if you want to allow browsers to render Grafana in a ,

+ +Instrumenting a binary +======================================== + +.. raw:: html + +

+ +Writing a ROCm Systems Profiler configuration file +================================================== + +.. raw:: html + +

+ +Visualization and features of Perfetto traces +============================================= + +.. raw:: html + +

diff --git a/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst b/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst new file mode 100644 index 0000000000..51100b81b2 --- /dev/null +++ b/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst @@ -0,0 +1,33 @@ +.. meta:: + :description: ROCm Systems Profiler introduction, explanation, and reference + :keywords: rocprof-sys, rocprofiler-systems, Omnitrace, ROCm, profiler, explanation, introduction, what is, tracking, visualization, tool, Instinct, accelerator, AMD + +****************************** +What is ROCm Systems Profiler? +****************************** + +ROCm Systems Profiler is designed for the high-level profiling and comprehensive tracing +of applications running on the CPU or the CPU and GPU. It supports dynamic binary +instrumentation, call-stack sampling, and various other features for determining +which function and line number are currently executing. + +A visualization of the comprehensive ROCm Systems Profiler results can be observed in any modern +web browser. Upload the Perfetto (``.proto``) output files produced by ROCm Systems Profiler at +`ui.perfetto.dev `_ to see the details. + +.. important:: + If you are using a version of ROCm prior to ROCm 6.3.1 and are experiencing problems viewing your + trace in the latest version of [Perfetto](http://ui.perfetto.dev), then try using + [Perfetto UI v46.0](https://ui.perfetto.dev/v46.0-35b3d9845/#!/). + +Aggregated high-level results are available as human-readable text files and +JSON files for programmatic analysis. The JSON output files are compatible with the +`hatchet `_ Python package. Hatchet converts +the performance data into pandas data frames and facilitates multi-run comparisons, filtering, +and visualization in Jupyter notebooks. + +To use ROCm Systems Profiler for instrumentation, follow these two configuration steps: + +#. Indicate the functions and modules to :doc:`instrument <./how-to/instrumenting-rewriting-binary-application>` in the target binaries, including the executable and any libraries +#. Specify the :doc:`instrumentation parameters <./how-to/configuring-runtime-options>` to use when the instrumented binaries are launched + diff --git a/projects/rocprofiler-systems/examples/.gitignore b/projects/rocprofiler-systems/examples/.gitignore new file mode 100644 index 0000000000..ae0c959c3b --- /dev/null +++ b/projects/rocprofiler-systems/examples/.gitignore @@ -0,0 +1,2 @@ +/build* +/install* diff --git a/projects/rocprofiler-systems/examples/CMakeLists.txt b/projects/rocprofiler-systems/examples/CMakeLists.txt new file mode 100644 index 0000000000..9c29d29dd3 --- /dev/null +++ b/projects/rocprofiler-systems/examples/CMakeLists.txt @@ -0,0 +1,80 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-examples LANGUAGES C CXX) + +if("${CMAKE_BUILD_TYPE}" STREQUAL "") + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type" FORCE) +endif() + +string(TOUPPER "${CMAKE_BUILD_TYPE}" BUILD_TYPE) + +set(CMAKE_VISIBILITY_INLINES_HIDDEN OFF) +set(CMAKE_CXX_VISIBILITY_PRESET "default") +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_CLANG_TIDY) +set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME examples) + +if(ROCPROFSYS_BUILD_DEBUG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -fno-omit-frame-pointer") +endif() + +option(BUILD_SHARED_LIBS "Build dynamic libraries" ON) + +if(CMAKE_PROJECT_NAME STREQUAL "rocprofiler-systems") + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) + rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_EXAMPLES + "Install rocprofiler-systems examples" OFF + ) +else() + option(ROCPROFSYS_INSTALL_EXAMPLES "Install rocprofiler-systems examples" ON) +endif() + +if(ROCPROFSYS_INSTALL_EXAMPLES) + include(GNUInstallDirs) +endif() + +set(ROCPROFSYS_EXAMPLE_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "") +# defines function for creating causal profiling exes +include(${CMAKE_CURRENT_LIST_DIR}/causal-helpers.cmake) + +add_subdirectory(transpose) +add_subdirectory(parallel-overhead) +add_subdirectory(code-coverage) +add_subdirectory(user-api) +add_subdirectory(openmp) +add_subdirectory(mpi) +add_subdirectory(python) +add_subdirectory(lulesh) +add_subdirectory(rccl) +add_subdirectory(rewrite-caller) +add_subdirectory(causal) +add_subdirectory(trace-time-window) +add_subdirectory(fork) +add_subdirectory(videodecode) +add_subdirectory(jpegdecode) +add_subdirectory(roctx) diff --git a/projects/rocprofiler-systems/examples/causal-helpers.cmake b/projects/rocprofiler-systems/examples/causal-helpers.cmake new file mode 100644 index 0000000000..7a472f4058 --- /dev/null +++ b/projects/rocprofiler-systems/examples/causal-helpers.cmake @@ -0,0 +1,152 @@ +# +# function for +# +include_guard(DIRECTORY) + +if(NOT TARGET rocprofiler-systems::rocprofiler-systems-user-library) + find_package(rocprofiler-systems REQUIRED COMPONENTS user) +endif() + +if(NOT coz-profiler_FOUND) + find_package(coz-profiler QUIET) +endif() + +if(NOT TARGET rocprofsys-causal-examples) + add_custom_target(rocprofsys-causal-examples) +endif() + +function(rocprofiler_systems_causal_example_executable _NAME) + cmake_parse_arguments( + CAUSAL + "" + "" + "SOURCES;DEFINITIONS;INCLUDE_DIRECTORIES;LINK_LIBRARIES" + ${ARGN} + ) + + function(rocprofiler_systems_causal_example_interface _TARGET) + if(NOT TARGET ${_TARGET}) + find_package(Threads REQUIRED) + add_library(${_TARGET} INTERFACE) + target_link_libraries(${_TARGET} INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() + endfunction() + + rocprofiler_systems_causal_example_interface(rocprofsys-causal-example-lib-debug) + rocprofiler_systems_causal_example_interface(rocprofsys-causal-example-lib-no-debug) + + target_compile_options( + rocprofsys-causal-example-lib-debug + INTERFACE -g3 -fno-omit-frame-pointer + ) + target_compile_options(rocprofsys-causal-example-lib-no-debug INTERFACE -g0) + + add_executable(${_NAME} ${CAUSAL_SOURCES}) + target_compile_definitions( + ${_NAME} + PRIVATE USE_COZ=0 USE_OMNI=0 ${CAUSAL_DEFINITIONS} + ) + target_include_directories( + ${_NAME} + PRIVATE ${ROCPROFSYS_EXAMPLE_ROOT_DIR}/causal ${CAUSAL_INCLUDE_DIRECTORIES} + ) + target_link_libraries( + ${_NAME} + PRIVATE + ${CAUSAL_LINK_LIBRARIES} + rocprofiler-systems::rocprofiler-systems-user-library + rocprofsys-causal-example-lib-debug + ) + + add_executable(${_NAME}-rocprofsys ${CAUSAL_SOURCES}) + target_compile_definitions( + ${_NAME}-rocprofsys + PRIVATE USE_COZ=0 USE_OMNI=1 ${CAUSAL_DEFINITIONS} + ) + target_include_directories( + ${_NAME}-rocprofsys + PRIVATE ${ROCPROFSYS_EXAMPLE_ROOT_DIR}/causal ${CAUSAL_INCLUDE_DIRECTORIES} + ) + target_link_libraries( + ${_NAME}-rocprofsys + PRIVATE + ${CAUSAL_LINK_LIBRARIES} + rocprofiler-systems::rocprofiler-systems-user-library + rocprofsys-causal-example-lib-debug + ) + + add_executable(${_NAME}-ndebug ${CAUSAL_SOURCES}) + target_compile_definitions( + ${_NAME}-ndebug + PRIVATE USE_COZ=0 USE_OMNI=0 ${CAUSAL_DEFINITIONS} + ) + target_include_directories( + ${_NAME}-ndebug + PRIVATE ${ROCPROFSYS_EXAMPLE_ROOT_DIR}/causal ${CAUSAL_INCLUDE_DIRECTORIES} + ) + target_link_libraries( + ${_NAME}-ndebug + PRIVATE + ${CAUSAL_LINK_LIBRARIES} + rocprofiler-systems::rocprofiler-systems-user-library + rocprofsys-causal-example-lib-no-debug + ) + + add_executable(${_NAME}-rocprofsys-ndebug ${CAUSAL_SOURCES}) + target_compile_definitions( + ${_NAME}-rocprofsys-ndebug + PRIVATE USE_COZ=0 USE_OMNI=1 ${CAUSAL_DEFINITIONS} + ) + target_include_directories( + ${_NAME}-rocprofsys-ndebug + PRIVATE ${ROCPROFSYS_EXAMPLE_ROOT_DIR}/causal ${CAUSAL_INCLUDE_DIRECTORIES} + ) + target_link_libraries( + ${_NAME}-rocprofsys-ndebug + PRIVATE + ${CAUSAL_LINK_LIBRARIES} + rocprofiler-systems::rocprofiler-systems-user-library + rocprofsys-causal-example-lib-no-debug + ) + + add_dependencies( + rocprofsys-causal-examples + ${_NAME} + ${_NAME}-rocprofsys + ${_NAME}-ndebug + ${_NAME}-rocprofsys-ndebug + ) + + if(coz-profiler_FOUND) + rocprofiler_systems_causal_example_interface(rocprofsys-causal-example-lib-coz) + target_compile_options( + rocprofsys-causal-example-lib-coz + INTERFACE -g3 -gdwarf-3 -fno-omit-frame-pointer + ) + + add_executable(${_NAME}-coz ${CAUSAL_SOURCES}) + target_compile_definitions( + ${_NAME}-coz + PRIVATE USE_COZ=1 USE_OMNI=0 ${CAUSAL_DEFINITIONS} + ) + target_include_directories( + ${_NAME}-coz + PRIVATE ${ROCPROFSYS_EXAMPLE_ROOT_DIR}/causal ${CAUSAL_INCLUDE_DIRECTORIES} + ) + target_link_libraries( + ${_NAME}-coz + PRIVATE ${CAUSAL_LINK_LIBRARIES} rocprofsys-causal-example-lib-coz coz::coz + ) + + add_dependencies(rocprofsys-causal-examples ${_NAME}-coz) + endif() + + if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + TARGETS ${_NAME} ${_NAME}-rocprofsys ${_NAME}-coz + DESTINATION bin + COMPONENT rocprofiler-systems-examples + OPTIONAL + ) + endif() +endfunction() diff --git a/projects/rocprofiler-systems/examples/causal/CMakeLists.txt b/projects/rocprofiler-systems/examples/causal/CMakeLists.txt new file mode 100644 index 0000000000..31bf1eb567 --- /dev/null +++ b/projects/rocprofiler-systems/examples/causal/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-causal-example LANGUAGES CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +set(CMAKE_BUILD_TYPE "Release") +find_package(Threads REQUIRED) +if(NOT TARGET rocprofiler-systems::rocprofiler-systems-user-library) + find_package(rocprofiler-systems REQUIRED COMPONENTS user) +endif() + +add_library(causal-interface-library INTERFACE) +target_compile_options( + causal-interface-library + INTERFACE -g3 -gdwarf-3 -fno-omit-frame-pointer +) +target_link_libraries( + causal-interface-library + INTERFACE Threads::Threads ${CMAKE_DL_LIBS} +) + +rocprofiler_systems_causal_example_executable( + "causal-both" + SOURCES causal.cpp impl.cpp + LINK_LIBRARIES causal-interface-library + DEFINITIONS USE_RNG=1 USE_CPU=1 +) + +rocprofiler_systems_causal_example_executable( + "causal-rng" + SOURCES causal.cpp impl.cpp + LINK_LIBRARIES causal-interface-library + DEFINITIONS USE_RNG=1 USE_CPU=0 +) + +rocprofiler_systems_causal_example_executable( + "causal-cpu" + SOURCES causal.cpp impl.cpp + LINK_LIBRARIES causal-interface-library + DEFINITIONS USE_RNG=0 USE_CPU=1 +) diff --git a/projects/rocprofiler-systems/examples/causal/causal.cpp b/projects/rocprofiler-systems/examples/causal/causal.cpp new file mode 100644 index 0000000000..a8a98a996f --- /dev/null +++ b/projects/rocprofiler-systems/examples/causal/causal.cpp @@ -0,0 +1,112 @@ +#include "causal.hpp" +#include "impl.hpp" + +namespace +{ +std::chrono::duration t_ms; +std::chrono::duration slow_ms; +std::chrono::duration fast_ms; +} // namespace + +int +main(int argc, char** argv) +{ + uint64_t rseed = std::random_device{}(); + size_t nitr = 50; + double frac = 70; + int64_t slow_val = 200000000L; + size_t nsync = 1; + + if(argc > 1) frac = std::stod(argv[1]); + if(argc > 2) nitr = std::stoull(argv[2]); + if(argc > 3) rseed = std::stoul(argv[3]); + if(argc > 4) slow_val = std::stol(argv[4]); + if(argc > 5) nsync = std::stoull(argv[5]); + + nsync = std::min(std::max(nsync, 1), nitr); + int64_t fast_val = (frac / 100.0) * slow_val; + double rfrac = (fast_val / static_cast(slow_val)); + if(argc > 5) fast_val = std::stol(argv[5]); + + printf("\nFraction: %6.2f, iterations: %zu, random seed: %lu :: slow = %zu, " + "fast = %zu, expected ratio = %6.2f, sync every %lu iterations\n", + frac, nitr, rseed, slow_val, fast_val, rfrac * 100.0, nsync); + + auto _wait_barrier = pthread_barrier_t{}; + pthread_barrier_init(&_wait_barrier, nullptr, 3); + auto _thread_func = [nitr, nsync, &_wait_barrier](const auto& _func, auto* _timer, + auto _nsec, auto _nseed, + auto _nloop) { + pthread_barrier_wait(&_wait_barrier); + for(size_t i = 0; i < nitr; ++i) + { + auto _t = clock_type::now(); + _func(_nsec, _nseed, _nloop); + (*_timer) += (clock_type::now() - _t); + CAUSAL_PROGRESS_NAMED("iteration"); + if(i % nsync == (nsync - 1)) pthread_barrier_wait(&_wait_barrier); + } + }; + + auto _t = clock_type::now(); + auto _threads = std::vector{}; + _threads.emplace_back(_thread_func, SLOW_FUNC, &slow_ms, slow_val, rseed, 10000); + _threads.emplace_back(_thread_func, FAST_FUNC, &fast_ms, fast_val, rseed, 10000); + pthread_barrier_wait(&_wait_barrier); + for(size_t i = 0; i < nitr; ++i) + { + if(i == 0 || i + 1 == nitr || i % (nitr / 5) == 0) + (printf("executing iteration: %zu\n", i), fflush(stdout)); + if(i % nsync == (nsync - 1)) pthread_barrier_wait(&_wait_barrier); + } + for(auto& itr : _threads) + itr.join(); + + t_ms += clock_type::now() - _t; + auto rms = (fast_ms.count() / slow_ms.count()); + printf("slow_func() took %10.3f ms\n", slow_ms.count()); + printf("fast_func() took %10.3f ms\n", fast_ms.count()); + printf("total is %18.3f ms\n", t_ms.count()); + printf("ratio is %18.3f %s\n", 100.0 * rms, "%"); + printf("rdiff is %18.3f %s\n", 100.0 * (rms - rfrac), "%"); +} +// +// +// +void +rng_slow_func(int64_t n, uint64_t rseed) +{ + // clang-format off + while(rng_impl_func(n, rseed) != false) {} + // clang-format on +} +// +// +// +void +rng_fast_func(int64_t n, uint64_t rseed) +{ + // clang-format off + while(rng_impl_func(n, rseed) != true) {} + // clang-format on +} +// +// +// +void +cpu_slow_func(int64_t n, int nloop) +{ + // clang-format off + while(cpu_impl_func(n, nloop) != false) {} + // clang-format on +} +// +// +// +void +cpu_fast_func(int64_t n, int nloop) +{ + // clang-format off + while(cpu_impl_func(n, nloop) != true) {} + // clang-format on +} diff --git a/projects/rocprofiler-systems/examples/causal/causal.hpp b/projects/rocprofiler-systems/examples/causal/causal.hpp new file mode 100644 index 0000000000..65eb857b79 --- /dev/null +++ b/projects/rocprofiler-systems/examples/causal/causal.hpp @@ -0,0 +1,46 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define CAUSAL_STR2(x) #x +#define CAUSAL_STR(x) CAUSAL_STR2(x) +#define CAUSAL_LABEL __FILE__ ":" CAUSAL_STR(__LINE__) + +#if defined(USE_OMNI) && USE_OMNI > 0 +# include +# define CAUSAL_PROGRESS ROCPROFSYS_CAUSAL_PROGRESS +# define CAUSAL_PROGRESS_NAMED(LABEL) ROCPROFSYS_CAUSAL_PROGRESS_NAMED(LABEL) +# define CAUSAL_BEGIN(LABEL) ROCPROFSYS_CAUSAL_BEGIN(LABEL) +# define CAUSAL_END(LABEL) ROCPROFSYS_CAUSAL_END(LABEL) +#elif defined(USE_COZ) && USE_COZ > 0 +# include +# define CAUSAL_PROGRESS COZ_PROGRESS_NAMED(CAUSAL_LABEL) +# define CAUSAL_PROGRESS_NAMED(LABEL) COZ_PROGRESS_NAMED(LABEL) +# define CAUSAL_BEGIN(LABEL) COZ_BEGIN(LABEL) +# define CAUSAL_END(LABEL) COZ_END(LABEL) +#else +# define CAUSAL_PROGRESS +# define CAUSAL_PROGRESS_NAMED(LABEL) +# define CAUSAL_BEGIN(LABEL) +# define CAUSAL_END(LABEL) +#endif diff --git a/projects/rocprofiler-systems/examples/causal/impl.cpp b/projects/rocprofiler-systems/examples/causal/impl.cpp new file mode 100644 index 0000000000..e43325ffb7 --- /dev/null +++ b/projects/rocprofiler-systems/examples/causal/impl.cpp @@ -0,0 +1,141 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "causal.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using mutex_t = std::timed_mutex; +using auto_lock_t = std::unique_lock; +using clock_type = std::chrono::high_resolution_clock; +using nanosec = std::chrono::nanoseconds; + +namespace +{ +inline __attribute__((always_inline)) int64_t +clock_tick() noexcept; + +template +inline __attribute__((always_inline)) Ret +clock_tick() noexcept; + +template +inline __attribute__((always_inline)) Tp +get_clock_now(clockid_t clock_id) noexcept; + +template +inline __attribute__((always_inline)) Tp +get_clock_cpu_now() noexcept; +} // namespace + +// +// This implementation works well for rocprof-sys +// while COZ makes poor predictions +// +template +bool +rng_impl_func(int64_t n, uint64_t rseed) +{ + int64_t _n = 0; + auto _rng = std::mt19937_64{ rseed }; + auto _dist = std::uniform_int_distribution{ 1, 1 }; + // clang-format off + while(_n < n) _n += _dist(_rng); + // clang-format on + return V; +} + +template bool rng_impl_func(int64_t, uint64_t); +template bool rng_impl_func(int64_t, uint64_t); + +// +// This implementation works well for COZ +// while rocprof-sys makes poor predictions +// +template +bool +cpu_impl_func(int64_t n, int nloop) +{ + auto _t = clock_type::now(); + auto _cpu_now = get_clock_cpu_now(); + auto _cpu_end = _cpu_now + n; + // clang-format off + while(get_clock_cpu_now() < _cpu_end) + { + for(volatile int i = 0; i < nloop; ++i) {} + CAUSAL_PROGRESS_NAMED("cpu_impl"); + } + // clang-format on + return V; +} + +template bool +cpu_impl_func(int64_t, int); +template bool +cpu_impl_func(int64_t, int); + +namespace +{ +int64_t +clock_tick() noexcept +{ + static int64_t _val = ::sysconf(_SC_CLK_TCK); + return _val; +} + +template +Ret +clock_tick() noexcept +{ + return static_cast(Precision::den) / static_cast(clock_tick()); +} + +template +Tp +get_clock_now(clockid_t clock_id) noexcept +{ + constexpr Tp factor = Precision::den / static_cast(std::nano::den); + struct timespec ts; + clock_gettime(clock_id, &ts); + return (ts.tv_sec * std::nano::den + ts.tv_nsec) * factor; +} + +template +Tp +get_clock_cpu_now() noexcept +{ + return get_clock_now(CLOCK_THREAD_CPUTIME_ID); +} +} // namespace diff --git a/projects/rocprofiler-systems/examples/causal/impl.hpp b/projects/rocprofiler-systems/examples/causal/impl.hpp new file mode 100644 index 0000000000..fdb4ceda1e --- /dev/null +++ b/projects/rocprofiler-systems/examples/causal/impl.hpp @@ -0,0 +1,97 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using mutex_t = std::timed_mutex; +using auto_lock_t = std::unique_lock; +using clock_type = std::chrono::high_resolution_clock; +using nanosec = std::chrono::nanoseconds; + +namespace +{ +template +inline void +consume_variables(Args&&...) +{} +} // namespace + +template +bool +rng_impl_func(int64_t n, uint64_t rseed); + +template +bool +cpu_impl_func(int64_t n, int nloop); + +void +rng_slow_func(int64_t n, uint64_t rseed) __attribute__((noinline)); + +void +rng_fast_func(int64_t n, uint64_t rseed) __attribute__((noinline)); + +void +cpu_slow_func(int64_t n, int nloop) __attribute__((noinline)); + +void +cpu_fast_func(int64_t n, int nloop) __attribute__((noinline)); + +#if USE_CPU > 0 +# define CPU_SLOW_FUNC(...) cpu_slow_func(__VA_ARGS__) +# define CPU_FAST_FUNC(...) cpu_fast_func(__VA_ARGS__) +#else +# define CPU_SLOW_FUNC(...) consume_variables(__VA_ARGS__) +# define CPU_FAST_FUNC(...) consume_variables(__VA_ARGS__) +#endif + +#if USE_RNG > 0 +# define RNG_SLOW_FUNC(...) rng_slow_func(__VA_ARGS__) +# define RNG_FAST_FUNC(...) rng_fast_func(__VA_ARGS__) +#else +# define RNG_SLOW_FUNC(...) consume_variables(__VA_ARGS__) +# define RNG_FAST_FUNC(...) consume_variables(__VA_ARGS__) +#endif + +#define SLOW_FUNC \ + [](auto _nsec_v, auto _nseed_v, auto _nloop_v) { \ + CPU_SLOW_FUNC(_nsec_v, _nloop_v); \ + RNG_SLOW_FUNC(_nsec_v / 5, _nseed_v); \ + } + +#define FAST_FUNC \ + [](auto _nsec_v, auto _nseed_v, auto _nloop_v) { \ + CPU_FAST_FUNC(_nsec_v, _nloop_v); \ + RNG_FAST_FUNC(_nsec_v / 5, _nseed_v); \ + } diff --git a/projects/rocprofiler-systems/examples/code-coverage/CMakeLists.txt b/projects/rocprofiler-systems/examples/code-coverage/CMakeLists.txt new file mode 100644 index 0000000000..07ecbd7873 --- /dev/null +++ b/projects/rocprofiler-systems/examples/code-coverage/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-code-coverage-example LANGUAGES CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") +string(REPLACE " " ";" _FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") + +find_package(Threads REQUIRED) +add_executable(code-coverage code-coverage.cpp) +target_link_libraries(code-coverage PRIVATE Threads::Threads) +target_compile_options(code-coverage PRIVATE ${_FLAGS}) + +if(ROCPROFSYS_INSTALL_EXAMPLES) + install(TARGETS code-coverage DESTINATION bin COMPONENT rocprofiler-systems-examples) +endif() + +set(PYTHON_FILES code-coverage.py) + +find_package(Python3 COMPONENTS Interpreter) + +if(Python3_FOUND) + set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}") + + foreach(_FILE ${PYTHON_FILES}) + configure_file( + ${PROJECT_SOURCE_DIR}/${_FILE} + ${PROJECT_BINARY_DIR}/${_FILE} + @ONLY + ) + + if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + PROGRAMS ${PROJECT_BINARY_DIR}/${_FILE} + DESTINATION bin + COMPONENT rocprofiler-systems-examples + ) + endif() + endforeach() +endif() diff --git a/projects/rocprofiler-systems/examples/code-coverage/code-coverage.cpp b/projects/rocprofiler-systems/examples/code-coverage/code-coverage.cpp new file mode 100644 index 0000000000..2e3c37c3df --- /dev/null +++ b/projects/rocprofiler-systems/examples/code-coverage/code-coverage.cpp @@ -0,0 +1,88 @@ + +#include +#include +#include +#include +#include +#include + +#define NOINLINE __attribute__((noinline)) + +std::atomic total{ 0 }; + +long +fib(long n) NOINLINE; + +void +run_real(size_t nitr, long) NOINLINE; + +void +run_fake(size_t nitr, long) NOINLINE; + +int +main(int argc, char** argv) +{ + using exec_t = void (*)(size_t, long); + + std::string _name = argv[0]; + auto _pos = _name.find_last_of('/'); + if(_pos != std::string::npos) _name = _name.substr(_pos + 1); + + size_t nthread = std::min(16, std::thread::hardware_concurrency()); + size_t nitr = 5000; + long nfib = 10; + + if(argc > 1) nfib = atol(argv[1]); + if(argc > 2) nthread = atol(argv[2]); + if(argc > 3) nitr = atol(argv[3]); + + exec_t _exec = &run_real; + + // ensure that compiler cannot optimize run_fake away + if(std::getenv("CODE_COVERAGE_USE_FAKE") != nullptr) _exec = &run_fake; + + printf("[%s] Threads: %zu\n[%s] Iterations: %zu\n[%s] fibonacci(%li)...\n", + _name.c_str(), nthread, _name.c_str(), nitr, _name.c_str(), nfib); + + std::vector threads{}; + for(size_t i = 0; i < nthread; ++i) + { + size_t _nitr = ((i % 2) == 1) ? (nitr - (0.1 * nitr)) : (nitr + (0.1 * nitr)); + _nitr = std::max(_nitr, 1); + threads.emplace_back(_exec, _nitr, nfib); + } + + auto _nitr = std::max(nitr - 0.25 * nitr, 1); + (*_exec)(_nitr, nfib - 0.1 * nfib); + for(auto& itr : threads) + itr.join(); + + printf("[%s] fibonacci(%li) x %lu = %li\n", _name.c_str(), nfib, nthread, + total.load()); + + return 0; +} + +long +fib(long n) +{ + return (n < 2) ? n : fib(n - 1) + fib(n - 2); +} + +void +run_real(size_t nitr, long n) +{ + long local = 0; + for(size_t i = 0; i < nitr; ++i) + local += fib(n); + total += local; +} + +void +run_fake(size_t nitr, long n) +{ + long local = 0; + for(size_t i = 0; i < nitr; ++i) + local += fib(n); + total += local; +} diff --git a/projects/rocprofiler-systems/examples/code-coverage/code-coverage.py b/projects/rocprofiler-systems/examples/code-coverage/code-coverage.py new file mode 100644 index 0000000000..9dae8beefb --- /dev/null +++ b/projects/rocprofiler-systems/examples/code-coverage/code-coverage.py @@ -0,0 +1,54 @@ +#!@PYTHON_EXECUTABLE@ + +import rocprofsys +import argparse + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "-i", + "--input", + type=str, + nargs="+", + help="Input code coverage", + default=None, + required=True, + ) + parser.add_argument( + "-o", + "--output", + type=str, + help="Output code coverage", + default=None, + required=True, + ) + + args = parser.parse_args() + + data = None + for itr in args.input: + _summary, _details = rocprofsys.coverage.load(itr) + if data is None: + data = _details + else: + data = rocprofsys.coverage.concat(data, _details) + + summary = rocprofsys.coverage.get_summary(data) + top = rocprofsys.coverage.get_top(data) + bottom = rocprofsys.coverage.get_bottom(data) + + print("Top code coverage:") + for itr in top: + print( + f" {itr.count} | {itr.function} | {itr.module}:{itr.line} | {itr.source}" + ) + + print("Bottom code coverage:") + for itr in bottom: + print( + f" {itr.count} | {itr.function} | {itr.module}:{itr.line} | {itr.source}" + ) + + print("\nSaving code coverage") + rocprofsys.coverage.save(summary, data, args.output) diff --git a/projects/rocprofiler-systems/examples/fork/CMakeLists.txt b/projects/rocprofiler-systems/examples/fork/CMakeLists.txt new file mode 100644 index 0000000000..3c8a989eb4 --- /dev/null +++ b/projects/rocprofiler-systems/examples/fork/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-fork LANGUAGES CXX) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") +string(REPLACE " " ";" _FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") + +find_package(Threads REQUIRED) +find_package(rocprofiler-systems REQUIRED COMPONENTS user) +add_executable(fork-example fork.cpp) +target_link_libraries( + fork-example + PRIVATE Threads::Threads rocprofiler-systems::rocprofiler-systems +) +target_compile_options(fork-example PRIVATE ${_FLAGS}) + +if(ROCPROFSYS_INSTALL_EXAMPLES) + install(TARGETS fork-example DESTINATION bin COMPONENT rocprofiler-systems-examples) +endif() diff --git a/projects/rocprofiler-systems/examples/fork/fork.cpp b/projects/rocprofiler-systems/examples/fork/fork.cpp new file mode 100644 index 0000000000..fda89412b0 --- /dev/null +++ b/projects/rocprofiler-systems/examples/fork/fork.cpp @@ -0,0 +1,137 @@ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void +print_info(const char* _name) +{ + fflush(stdout); + fflush(stderr); + printf("[%s] pid = %i, ppid = %i\n", _name, getpid(), getppid()); + fflush(stdout); + fflush(stderr); +} + +int +run(const char* _name, int nchildren) +{ + auto _barrier = pthread_barrier_t{}; + auto _threads = std::vector{}; + auto _children = std::vector{}; + _children.resize(nchildren, 0); + pthread_barrier_init(&_barrier, nullptr, nchildren + 1); + for(int i = 0; i < nchildren; ++i) + { + rocprofsys_user_push_region("launch_child"); + auto _run = [&_barrier, &_children, i, _name](uint64_t _nsec) { + pthread_barrier_wait(&_barrier); + _children.at(i) = fork(); + if(_children.at(i) == 0) + { + // child code + print_info(_name); + printf("[%s][%i] child job starting...\n", _name, getpid()); + auto _sleep = [=]() { + rocprofsys_user_push_region("child_process_child_thread"); + std::this_thread::sleep_for(std::chrono::seconds{ _nsec }); + rocprofsys_user_pop_region("child_process_child_thread"); + }; + rocprofsys_user_push_region("child_process"); + std::thread{ _sleep }.join(); + rocprofsys_user_push_region("child_process"); + printf("[%s][%i] child job complete\n", _name, getpid()); + exit(EXIT_SUCCESS); + } + else + { + pthread_barrier_wait(&_barrier); + } + }; + _threads.emplace_back(_run, i + 1); + rocprofsys_user_pop_region("launch_child"); + } + + // all child threads should start executing their fork once this returns + pthread_barrier_wait(&_barrier); + // wait for the threads to successfully fork + pthread_barrier_wait(&_barrier); + + rocprofsys_user_push_region("wait_for_children"); + + int _status = 0; + pid_t _wait_pid = 0; + // parent waits for all the child processes + for(auto& itr : _children) + { + while(itr == 0) + { + } + printf("[%s][%i] performing waitpid(%i, ...)\n", _name, getpid(), itr); + while((_wait_pid = waitpid(itr, &_status, WUNTRACED | WNOHANG)) <= 0) + { + if(_wait_pid == 0) continue; + + printf("[%s][%i] returned from waitpid(%i) with pid = %i (status = %i) :: ", + _name, getpid(), itr, _wait_pid, _status); + if(WIFEXITED(_status)) + { + printf("exited, status=%d\n", WEXITSTATUS(_status)); + } + else if(WIFSIGNALED(_status)) + { + printf("killed by signal %d\n", WTERMSIG(_status)); + } + else if(WIFSTOPPED(_status)) + { + printf("stopped by signal %d\n", WSTOPSIG(_status)); + } + else if(WIFCONTINUED(_status)) + { + printf("continued\n"); + } + else + { + printf("unknown\n"); + } + } + } + + printf("[%s][%i] joining threads ...\n", _name, getpid()); + for(auto& itr : _threads) + itr.join(); + + rocprofsys_user_pop_region("wait_for_children"); + + printf("[%s][%i] returning (error code: %i) ...\n", _name, getpid(), _status); + return _status; +} + +int +main(int argc, char** argv) +{ + int _nfork = 4; + int _nrep = 1; + if(argc > 1) _nfork = std::stoi(argv[1]); + if(argc > 2) _nrep = std::stoi(argv[2]); + + print_info(argv[0]); + for(int i = 0; i < _nrep; ++i) + { + auto _ec = run(argv[0], _nfork); + if(_ec != 0) return _ec; + } + + printf("[%s][%i] job complete\n", argv[0], getpid()); + return EXIT_SUCCESS; +} diff --git a/projects/rocprofiler-systems/examples/jpegdecode/CMakeLists.txt b/projects/rocprofiler-systems/examples/jpegdecode/CMakeLists.txt new file mode 100644 index 0000000000..e92c8f5e34 --- /dev/null +++ b/projects/rocprofiler-systems/examples/jpegdecode/CMakeLists.txt @@ -0,0 +1,147 @@ +################################################################################ +# Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +# This example requires hip and rocjpeg. +find_package(HIP QUIET) + +if(NOT HIP_FOUND) + message(WARNING "hip is not found. Skip jpegdecode example.") + return() +endif() + +# Set AMD Clang as default compiler +if(NOT DEFINED CMAKE_CXX_COMPILER) + set(CMAKE_C_COMPILER ${ROCmVersion_DIR}/bin/amdclang) + set(CMAKE_CXX_COMPILER ${ROCmVersion_DIR}/bin/amdclang++) +endif() + +project(rocprofiler-systems-jpegdecode-example) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED On) + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCmVersion_DIR}/lib/cmake ${ROCmVersion_DIR}) +list(APPEND CMAKE_PREFIX_PATH ${ROCmVersion_DIR}/hip ${ROCmVersion_DIR}) +list(APPEND CMAKE_MODULE_PATH ${ROCmVersion_DIR}/share/rocjpeg/cmake) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") +string(REPLACE " " ";" _FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +find_package(rocjpeg QUIET) +find_package(rocprofiler-register QUIET) + +# Copy image files to build directory +function(copy_image_files_and_make_copies) + if(EXISTS "${ROCmVersion_DIR}/share/rocjpeg/images") + if(NOT EXISTS "${CMAKE_BINARY_DIR}/images") + file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/images") + endif() + + file(GLOB_RECURSE image_files "${ROCmVersion_DIR}/share/rocjpeg/images/*") + file(COPY ${image_files} DESTINATION ${CMAKE_BINARY_DIR}/images) + set(NUM_COPIES 20) + + # Loop over each file and make additional copies + foreach(file ${image_files}) + get_filename_component(filename ${file} NAME) + foreach(i RANGE 1 ${NUM_COPIES}) + file( + COPY ${file} + DESTINATION ${CMAKE_BINARY_DIR}/images/${filename}_copy${i}.jpg + ) + endforeach() + endforeach() + else() + message( + AUTHOR_WARNING + "Source directory ${ROCmVersion_DIR}/share/rocjpeg/images does not exist" + ) + endif() +endfunction() + +# threads +find_package(Threads REQUIRED) + +if(HIP_FOUND AND rocjpeg_FOUND AND Threads_FOUND AND rocprofiler-register_FOUND) + # HIP + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::host) + # threads + set(THREADS_PREFER_PTHREAD_FLAG ON) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} Threads::Threads) + # std filesystem + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs) + # rocprofiler-register + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocprofiler-register::rocprofiler-register) + + # rocJPEG + message(STATUS "RocJPEG library found: ${rocjpeg_LIBRARIES}") + include_directories(${rocjpeg_INCLUDE_DIR}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocjpeg::rocjpeg) + list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecodeperf.cpp) + add_executable(jpegdecode ${SOURCES}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + target_link_libraries(jpegdecode ${LINK_LIBRARY_LIST}) + target_compile_options(jpegdecode PRIVATE ${_FLAGS}) + copy_image_files_and_make_copies() + + if(ROCPROFSYS_INSTALL_EXAMPLES) + install(TARGETS jpegdecode DESTINATION bin COMPONENT rocprofiler-systems-examples) + install( + FILES ${CMAKE_BINARY_DIR}/images + DESTINATION share/rocprofiler-systems/tests/images + COMPONENT rocprofiler-systems-examples + ) + endif() +else() + message( + "-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!" + ) + if(NOT HIP_FOUND) + message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!") + endif() + if(NOT rocjpeg_FOUND) + message(WARNING "-- ERROR!: rocJPEG Not Found! - please install rocJPEG!") + endif() + if(NOT Threads_FOUND) + message(FATAL_ERROR "-- ERROR!: Threads Not Found! - please insatll Threads!") + endif() + if(NOT rocprofiler-register_FOUND) + message( + FATAL_ERROR + "-- ERROR!: rocprofiler-register Not Found! - please install rocprofiler-register!" + ) + endif() +endif() diff --git a/projects/rocprofiler-systems/examples/jpegdecode/jpegdecodeperf.cpp b/projects/rocprofiler-systems/examples/jpegdecode/jpegdecodeperf.cpp new file mode 100644 index 0000000000..d5bd4ffdb4 --- /dev/null +++ b/projects/rocprofiler-systems/examples/jpegdecode/jpegdecodeperf.cpp @@ -0,0 +1,422 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rocjpeg_samples_utils.h" + +struct DecodeInfo +{ + std::vector file_paths; + RocJpegHandle rocjpeg_handle; + std::vector rocjpeg_stream_handles; + uint64_t num_decoded_images; + double images_per_sec; + double image_size_in_mpixels_per_sec; + uint64_t num_bad_jpegs; + uint64_t num_jpegs_with_411_subsampling; + uint64_t num_jpegs_with_unknown_subsampling; + uint64_t num_jpegs_with_unsupported_resolution; +}; + +/** + * @brief Decodes a batch of JPEG images and optionally saves the decoded images. + * + * @param decode_info parameters info for decoding a batch of jpeg images. + * @param rocjpeg_utils Utility functions for RocJpeg operations. + * @param decode_params Parameters for decoding the JPEG images (output_format, + * crop_rectangle) + * @param save_images A boolean flag indicating whether to save the decoded images. + * @param output_file_path The file path where the decoded images will be saved. + * @param batch_size The number of images to be processed in each batch. + */ +void +DecodeImages(DecodeInfo& decode_info, RocJpegUtils rocjpeg_utils, + RocJpegDecodeParams& decode_params, bool save_images, + std::string& output_file_path, int batch_size, int device_id) +{ + bool is_roi_valid = false; + uint32_t roi_width; + uint32_t roi_height; + uint8_t num_components; + uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {}; + std::string chroma_sub_sampling = ""; + uint32_t num_channels = 0; + double image_size_in_mpixels_all = 0; + double total_decode_time_in_milli_sec = 0; + int current_batch_size = 0; + std::vector> batch_images(batch_size); + std::vector> widths( + batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + std::vector> heights( + batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + std::vector> prior_channel_sizes( + batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + std::vector subsamplings(batch_size); + std::vector output_images(batch_size); + std::vector decode_params_batch(batch_size, decode_params); + std::vector base_file_names(batch_size); + std::vector rocjpeg_stream_handles(batch_size); + std::vector temp_widths(ROCJPEG_MAX_COMPONENT, 0); + std::vector temp_heights(ROCJPEG_MAX_COMPONENT, 0); + RocJpegChromaSubsampling temp_subsampling; + std::string temp_base_file_name; + + CHECK_HIP(hipSetDevice(device_id)); + for(int i = 0; i < decode_info.file_paths.size(); i += batch_size) + { + int batch_end = + std::min(i + batch_size, static_cast(decode_info.file_paths.size())); + for(int j = i; j < batch_end; j++) + { + int index = j - i; + + temp_base_file_name = decode_info.file_paths[j].substr( + decode_info.file_paths[j].find_last_of("/\\") + 1); + // Read an image from disk. + std::ifstream input(decode_info.file_paths[j].c_str(), + std::ios::in | std::ios::binary | std::ios::ate); + if(!(input.is_open())) + { + std::cerr << "ERROR: Cannot open image: " << decode_info.file_paths[j] + << std::endl; + return; + } + // Get the size + std::streamsize file_size = input.tellg(); + input.seekg(0, std::ios::beg); + // resize if buffer is too small + if(batch_images[index].size() < file_size) + { + batch_images[index].resize(file_size); + } + if(!input.read(batch_images[index].data(), file_size)) + { + std::cerr << "ERROR: Cannot read from file: " << decode_info.file_paths[j] + << std::endl; + return; + } + + RocJpegStatus rocjpeg_status = + rocJpegStreamParse(reinterpret_cast(batch_images[index].data()), + file_size, decode_info.rocjpeg_stream_handles[index]); + if(rocjpeg_status != ROCJPEG_STATUS_SUCCESS) + { + decode_info.num_bad_jpegs++; + std::cerr << "Skipping decoding input file: " << decode_info.file_paths[j] + << std::endl; + continue; + } + + CHECK_ROCJPEG(rocJpegGetImageInfo(decode_info.rocjpeg_handle, + decode_info.rocjpeg_stream_handles[index], + &num_components, &temp_subsampling, + temp_widths.data(), temp_heights.data())); + + rocjpeg_utils.GetChromaSubsamplingStr(temp_subsampling, chroma_sub_sampling); + if(temp_widths[0] < 64 || temp_heights[0] < 64) + { + decode_info.num_jpegs_with_unsupported_resolution++; + continue; + } + + if(temp_subsampling == ROCJPEG_CSS_411 || + temp_subsampling == ROCJPEG_CSS_UNKNOWN) + { + if(temp_subsampling == ROCJPEG_CSS_411) + { + decode_info.num_jpegs_with_411_subsampling++; + } + if(temp_subsampling == ROCJPEG_CSS_UNKNOWN) + { + decode_info.num_jpegs_with_unknown_subsampling++; + } + continue; + } + + if(rocjpeg_utils.GetChannelPitchAndSizes( + decode_params_batch[index], temp_subsampling, temp_widths.data(), + temp_heights.data(), num_channels, output_images[current_batch_size], + channel_sizes)) + { + std::cerr << "ERROR: Failed to get the channel pitch and sizes" + << std::endl; + return; + } + + // allocate memory for each channel and reuse them if the sizes remain + // unchanged for a new image. + for(int n = 0; n < num_channels; n++) + { + if(prior_channel_sizes[current_batch_size][n] != channel_sizes[n]) + { + if(output_images[current_batch_size].channel[n] != nullptr) + { + CHECK_HIP(hipFree( + (void*) output_images[current_batch_size].channel[n])); + output_images[current_batch_size].channel[n] = nullptr; + } + CHECK_HIP(hipMalloc(&output_images[current_batch_size].channel[n], + channel_sizes[n])); + prior_channel_sizes[current_batch_size][n] = channel_sizes[n]; + } + } + + rocjpeg_stream_handles[current_batch_size] = + decode_info.rocjpeg_stream_handles[index]; + subsamplings[current_batch_size] = temp_subsampling; + widths[current_batch_size] = temp_widths; + heights[current_batch_size] = temp_heights; + base_file_names[current_batch_size] = temp_base_file_name; + current_batch_size++; + } + + double time_per_batch_in_milli_sec = 0; + if(current_batch_size > 0) + { + auto start_time = std::chrono::high_resolution_clock::now(); + CHECK_ROCJPEG(rocJpegDecodeBatched( + decode_info.rocjpeg_handle, rocjpeg_stream_handles.data(), + current_batch_size, decode_params_batch.data(), output_images.data())); + auto end_time = std::chrono::high_resolution_clock::now(); + time_per_batch_in_milli_sec = + std::chrono::duration(end_time - start_time).count(); + } + + double image_size_in_mpixels = 0; + for(int b = 0; b < current_batch_size; b++) + { + image_size_in_mpixels += (static_cast(widths[b][0]) * + static_cast(heights[b][0]) / 1000000); + } + + decode_info.num_decoded_images += current_batch_size; + + if(save_images) + { + for(int b = 0; b < current_batch_size; b++) + { + std::string image_save_path = output_file_path; + // if ROI is present, need to pass roi_width and roi_height + roi_width = decode_params_batch[b].crop_rectangle.right - + decode_params_batch[b].crop_rectangle.left; + roi_height = decode_params_batch[b].crop_rectangle.bottom - + decode_params_batch[b].crop_rectangle.top; + is_roi_valid = (roi_width > 0 && roi_height > 0 && + roi_width <= widths[b][0] && roi_height <= heights[b][0]) + ? true + : false; + uint32_t width = is_roi_valid ? roi_width : widths[b][0]; + uint32_t height = is_roi_valid ? roi_height : heights[b][0]; + rocjpeg_utils.GetOutputFileExt(decode_params.output_format, + base_file_names[b], width, height, + subsamplings[b], image_save_path); + rocjpeg_utils.SaveImage(image_save_path, &output_images[b], width, height, + subsamplings[b], decode_params.output_format); + } + } + + total_decode_time_in_milli_sec += time_per_batch_in_milli_sec; + image_size_in_mpixels_all += image_size_in_mpixels; + + current_batch_size = 0; + } + + double avg_time_per_image = + decode_info.num_decoded_images > 0 + ? total_decode_time_in_milli_sec / decode_info.num_decoded_images + : 0; + decode_info.images_per_sec = avg_time_per_image > 0 ? 1000 / avg_time_per_image : 0; + decode_info.image_size_in_mpixels_per_sec = decode_info.num_decoded_images > 0 + ? decode_info.images_per_sec * + image_size_in_mpixels_all / + decode_info.num_decoded_images + : 0; + + for(auto& it : output_images) + { + for(int i = 0; i < ROCJPEG_MAX_COMPONENT; i++) + { + if(it.channel[i] != nullptr) + { + CHECK_HIP(hipFree((void*) it.channel[i])); + it.channel[i] = nullptr; + } + } + } +} + +int +main(int argc, char** argv) +{ + int device_id = 0; + bool save_images = false; + int num_threads = 1; + int batch_size = 1; + bool is_dir = false; + bool is_file = false; + RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE; + RocJpegDecodeParams decode_params = {}; + RocJpegUtils rocjpeg_utils; + std::string input_path, output_file_path; + std::vector file_paths = {}; + std::vector decode_info_per_thread; + + RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, + rocjpeg_backend, decode_params, &num_threads, + &batch_size, argc, argv); + if(!RocJpegUtils::GetFilePaths(input_path, file_paths, is_dir, is_file)) + { + std::cerr << "ERROR: Failed to get input file paths!" << std::endl; + return EXIT_FAILURE; + } + if(!RocJpegUtils::InitHipDevice(device_id)) + { + std::cerr << "ERROR: Failed to initialize HIP!" << std::endl; + return EXIT_FAILURE; + } + + if(num_threads > file_paths.size()) + { + num_threads = file_paths.size(); + } + + decode_info_per_thread.resize(num_threads); + + for(int i = 0; i < num_threads; i++) + { + CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, + &decode_info_per_thread[i].rocjpeg_handle)); + decode_info_per_thread[i].rocjpeg_stream_handles.resize(batch_size); + for(auto j = 0; j < batch_size; j++) + { + CHECK_ROCJPEG(rocJpegStreamCreate( + &decode_info_per_thread[i].rocjpeg_stream_handles[j])); + } + decode_info_per_thread[i].num_decoded_images = 0; + decode_info_per_thread[i].images_per_sec = 0; + decode_info_per_thread[i].image_size_in_mpixels_per_sec = 0; + decode_info_per_thread[i].num_bad_jpegs = 0; + decode_info_per_thread[i].num_jpegs_with_411_subsampling = 0; + decode_info_per_thread[i].num_jpegs_with_unknown_subsampling = 0; + decode_info_per_thread[i].num_jpegs_with_unsupported_resolution = 0; + } + + ThreadPool thread_pool(num_threads); + + size_t files_per_thread = file_paths.size() / num_threads; + size_t remaining_files = file_paths.size() % num_threads; + size_t start_index = 0; + for(int i = 0; i < num_threads; i++) + { + size_t end_index = start_index + files_per_thread + (i < remaining_files ? 1 : 0); + decode_info_per_thread[i].file_paths.assign(file_paths.begin() + start_index, + file_paths.begin() + end_index); + start_index = end_index; + } + + std::cout << "Decoding started with " << num_threads << " threads, please wait!" + << std::endl; + for(int i = 0; i < num_threads; ++i) + { + thread_pool.ExecuteJob( + std::bind(DecodeImages, std::ref(decode_info_per_thread[i]), rocjpeg_utils, + std::ref(decode_params), save_images, std::ref(output_file_path), + batch_size, device_id)); + } + thread_pool.JoinThreads(); + + uint64_t total_decoded_images = 0; + double total_images_per_sec = 0; + double total_image_size_in_mpixels_per_sec = 0; + uint64_t total_num_bad_jpegs = 0; + uint64_t total_num_jpegs_with_411_subsampling = 0; + uint64_t total_num_jpegs_with_unknown_subsampling = 0; + uint64_t total_num_jpegs_with_unsupported_resolution = 0; + + for(auto i = 0; i < num_threads; i++) + { + total_decoded_images += decode_info_per_thread[i].num_decoded_images; + total_image_size_in_mpixels_per_sec += + decode_info_per_thread[i].image_size_in_mpixels_per_sec; + total_images_per_sec += decode_info_per_thread[i].images_per_sec; + total_num_bad_jpegs += decode_info_per_thread[i].num_bad_jpegs; + total_num_jpegs_with_411_subsampling += + decode_info_per_thread[i].num_jpegs_with_411_subsampling; + total_num_jpegs_with_unknown_subsampling += + decode_info_per_thread[i].num_jpegs_with_unknown_subsampling; + total_num_jpegs_with_unsupported_resolution += + decode_info_per_thread[i].num_jpegs_with_unsupported_resolution; + } + + std::cout << "Total decoded images: " << total_decoded_images << std::endl; + if(total_num_bad_jpegs || total_num_jpegs_with_411_subsampling || + total_num_jpegs_with_unknown_subsampling || + total_num_jpegs_with_unsupported_resolution) + { + std::cout << "Total skipped images: " + << total_num_bad_jpegs + total_num_jpegs_with_411_subsampling + + total_num_jpegs_with_unknown_subsampling + + total_num_jpegs_with_unsupported_resolution; + if(total_num_bad_jpegs) + { + std::cout << " ,total images that cannot be parsed: " << total_num_bad_jpegs; + } + if(total_num_jpegs_with_411_subsampling) + { + std::cout << " ,total images with YUV 4:1:1 chroam subsampling: " + << total_num_jpegs_with_411_subsampling; + } + if(total_num_jpegs_with_unknown_subsampling) + { + std::cout << " ,total images with unknwon chroam subsampling: " + << total_num_jpegs_with_unknown_subsampling; + } + if(total_num_jpegs_with_unsupported_resolution) + { + std::cout << " ,total images with unsupported_resolution: " + << total_num_jpegs_with_unsupported_resolution; + } + std::cout << std::endl; + } + + if(total_decoded_images > 0) + { + std::cout << "Average processing time per image (ms): " + << 1000 / total_images_per_sec << std::endl; + std::cout << "Average decoded images per sec (Images/Sec): " + << total_images_per_sec << std::endl; + std::cout << "Average decoded images size (Mpixels/Sec): " + << total_image_size_in_mpixels_per_sec << std::endl; + } + + for(int i = 0; i < num_threads; i++) + { + CHECK_ROCJPEG(rocJpegDestroy(decode_info_per_thread[i].rocjpeg_handle)); + for(auto j = 0; j < batch_size; j++) + { + CHECK_ROCJPEG(rocJpegStreamDestroy( + decode_info_per_thread[i].rocjpeg_stream_handles[j])); + } + } + + std::cout << "Decoding completed!" << std::endl; + return EXIT_SUCCESS; +} diff --git a/projects/rocprofiler-systems/examples/jpegdecode/rocjpeg_samples_utils.h b/projects/rocprofiler-systems/examples/jpegdecode/rocjpeg_samples_utils.h new file mode 100644 index 0000000000..9c7b2b2fec --- /dev/null +++ b/projects/rocprofiler-systems/examples/jpegdecode/rocjpeg_samples_utils.h @@ -0,0 +1,903 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef ROC_JPEG_SAMPLES_COMMON +#define ROC_JPEG_SAMPLES_COMMON +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() +# include +namespace fs = std::filesystem; +#else +# include +namespace fs = std::experimental::filesystem; +#endif +#include +#include + +#define CHECK_ROCJPEG(call) \ + { \ + RocJpegStatus rocjpeg_status = (call); \ + if(rocjpeg_status != ROCJPEG_STATUS_SUCCESS) \ + { \ + std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) \ + << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ + exit(1); \ + } \ + } + +#define CHECK_HIP(call) \ + { \ + hipError_t hip_status = (call); \ + if(hip_status != hipSuccess) \ + { \ + std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ \ + << ":" << __LINE__ << std::endl; \ + exit(1); \ + } \ + } + +/** + * @class RocJpegUtils + * @brief Utility class for rocJPEG samples. + * + * This class provides utility functions for rocJPEG samples, such as parsing command line + * arguments, getting file paths, initializing HIP device, getting chroma subsampling + * string, getting channel pitch and sizes, getting output file extension, and saving + * images. + */ +class RocJpegUtils +{ +public: + /** + * @brief Parses the command line arguments. + * + * This function parses the command line arguments and sets the corresponding + * variables. + * + * @param input_path The input path. + * @param output_file_path The output file path. + * @param save_images Flag indicating whether to save images. + * @param device_id The device ID. + * @param rocjpeg_backend The rocJPEG backend. + * @param decode_params The rocJPEG decode parameters. + * @param num_threads The number of threads. + * @param crop The crop rectangle. + * @param argc The number of command line arguments. + * @param argv The command line arguments. + */ + static void ParseCommandLine(std::string& input_path, std::string& output_file_path, + bool& save_images, int& device_id, + RocJpegBackend& rocjpeg_backend, + RocJpegDecodeParams& decode_params, int* num_threads, + int* batch_size, int argc, char* argv[]) + { + if(argc <= 1) + { + ShowHelpAndExit("", num_threads != nullptr, batch_size != nullptr); + } + for(int i = 1; i < argc; i++) + { + if(!strcmp(argv[i], "-h")) + { + ShowHelpAndExit("", num_threads != nullptr, batch_size != nullptr); + } + if(!strcmp(argv[i], "-i")) + { + if(++i == argc) + { + ShowHelpAndExit("-i", num_threads != nullptr, batch_size != nullptr); + } + input_path = argv[i]; + continue; + } + if(!strcmp(argv[i], "-o")) + { + if(++i == argc) + { + ShowHelpAndExit("-o", num_threads != nullptr, batch_size != nullptr); + } + output_file_path = argv[i]; + save_images = true; + continue; + } + if(!strcmp(argv[i], "-d")) + { + if(++i == argc) + { + ShowHelpAndExit("-d", num_threads != nullptr, batch_size != nullptr); + } + device_id = atoi(argv[i]); + continue; + } + if(!strcmp(argv[i], "-be")) + { + if(++i == argc) + { + ShowHelpAndExit("-be", num_threads != nullptr, batch_size != nullptr); + } + rocjpeg_backend = static_cast(atoi(argv[i])); + continue; + } + if(!strcmp(argv[i], "-fmt")) + { + if(++i == argc) + { + ShowHelpAndExit("-fmt", num_threads != nullptr, + batch_size != nullptr); + } + std::string selected_output_format = argv[i]; + if(selected_output_format == "native") + { + decode_params.output_format = ROCJPEG_OUTPUT_NATIVE; + } + else if(selected_output_format == "yuv_planar") + { + decode_params.output_format = ROCJPEG_OUTPUT_YUV_PLANAR; + } + else if(selected_output_format == "y") + { + decode_params.output_format = ROCJPEG_OUTPUT_Y; + } + else if(selected_output_format == "rgb") + { + decode_params.output_format = ROCJPEG_OUTPUT_RGB; + } + else if(selected_output_format == "rgb_planar") + { + decode_params.output_format = ROCJPEG_OUTPUT_RGB_PLANAR; + } + else + { + ShowHelpAndExit(argv[i], num_threads != nullptr); + } + continue; + } + if(!strcmp(argv[i], "-t")) + { + if(++i == argc) + { + ShowHelpAndExit("-t", num_threads != nullptr, batch_size != nullptr); + } + if(num_threads != nullptr) + { + *num_threads = atoi(argv[i]); + if(*num_threads <= 0 || *num_threads > 32) + { + ShowHelpAndExit(argv[i], num_threads != nullptr, + batch_size != nullptr); + } + } + continue; + } + if(!strcmp(argv[i], "-b")) + { + if(++i == argc) + { + ShowHelpAndExit("-b", num_threads != nullptr, batch_size != nullptr); + } + if(batch_size != nullptr) *batch_size = atoi(argv[i]); + continue; + } + if(!strcmp(argv[i], "-crop")) + { + if(++i == argc || 4 != sscanf(argv[i], "%hd,%hd,%hd,%hd", + &decode_params.crop_rectangle.left, + &decode_params.crop_rectangle.top, + &decode_params.crop_rectangle.right, + &decode_params.crop_rectangle.bottom)) + { + ShowHelpAndExit("-crop"); + } + if((&decode_params.crop_rectangle.right - + &decode_params.crop_rectangle.left) % + 2 == + 1 || + (&decode_params.crop_rectangle.bottom - + &decode_params.crop_rectangle.top) % + 2 == + 1) + { + std::cout << "output crop rectangle must have width and height of " + "even numbers" + << std::endl; + exit(1); + } + continue; + } + ShowHelpAndExit(argv[i], num_threads != nullptr, batch_size != nullptr); + } + } + + /** + * Checks if a file is a JPEG file. + * + * @param filePath The path to the file to be checked. + * @return True if the file is a JPEG file, false otherwise. + */ + static bool IsJPEG(const std::string& filePath) + { + std::ifstream file(filePath, std::ios::binary); + if(!file.is_open()) + { + std::cerr << "Failed to open file: " << filePath << std::endl; + return false; + } + + unsigned char buffer[2]; + file.read(reinterpret_cast(buffer), 2); + file.close(); + + // The first two bytes of every JPEG stream are always 0xFFD8, which represents + // the Start of Image (SOI) marker. + return buffer[0] == 0xFF && buffer[1] == 0xD8; + } + + /** + * @brief Gets the file paths. + * + * This function gets the file paths based on the input path and sets the + * corresponding variables. + * + * @param input_path The input path. + * @param file_paths The vector to store the file paths. + * @param is_dir Flag indicating whether the input path is a directory. + * @param is_file Flag indicating whether the input path is a file. + * @return True if successful, false otherwise. + */ + static bool GetFilePaths(std::string& input_path, + std::vector& file_paths, bool& is_dir, + bool& is_file) + { + std::cout << "Reading images from disk, please wait!" << std::endl; + if(!fs::exists(input_path)) + { + std::cerr << "ERROR: the input path does not exist!" << std::endl; + return false; + } + is_dir = fs::is_directory(input_path); + is_file = fs::is_regular_file(input_path); + if(is_dir) + { + for(const auto& entry : fs::recursive_directory_iterator(input_path)) + { + if(fs::is_regular_file(entry) && IsJPEG(entry.path().string())) + { + file_paths.push_back(entry.path().string()); + } + } + } + else if(is_file && IsJPEG(input_path)) + { + file_paths.push_back(input_path); + } + else + { + std::cerr << "ERROR: the input path does not contain JPEG files!" + << std::endl; + return false; + } + return true; + } + + /** + * @brief Initializes the HIP device. + * + * This function initializes the HIP device with the specified device ID. + * + * @param device_id The device ID. + * @return True if successful, false otherwise. + */ + static bool InitHipDevice(int device_id) + { + int num_devices; + hipDeviceProp_t hip_dev_prop; + CHECK_HIP(hipGetDeviceCount(&num_devices)); + if(num_devices < 1) + { + std::cerr << "ERROR: didn't find any GPU!" << std::endl; + return false; + } + if(device_id >= num_devices) + { + std::cerr << "ERROR: the requested device_id is not found!" << std::endl; + return false; + } + CHECK_HIP(hipSetDevice(device_id)); + CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop, device_id)); + + std::cout << "Using GPU device " << device_id << ": " << hip_dev_prop.name << "[" + << hip_dev_prop.gcnArchName << "] on PCI bus " << std::setfill('0') + << std::setw(2) << std::right << std::hex << hip_dev_prop.pciBusID + << ":" << std::setfill('0') << std::setw(2) << std::right << std::hex + << hip_dev_prop.pciDomainID << "." << hip_dev_prop.pciDeviceID + << std::dec << std::endl; + + return true; + } + + /** + * @brief Gets the chroma subsampling string. + * + * This function gets the chroma subsampling string based on the specified subsampling + * value. + * + * @param subsampling The chroma subsampling value. + * @param chroma_sub_sampling The string to store the chroma subsampling. + */ + void GetChromaSubsamplingStr(RocJpegChromaSubsampling subsampling, + std::string& chroma_sub_sampling) + { + switch(subsampling) + { + case ROCJPEG_CSS_444: chroma_sub_sampling = "YUV 4:4:4"; break; + case ROCJPEG_CSS_440: chroma_sub_sampling = "YUV 4:4:0"; break; + case ROCJPEG_CSS_422: chroma_sub_sampling = "YUV 4:2:2"; break; + case ROCJPEG_CSS_420: chroma_sub_sampling = "YUV 4:2:0"; break; + case ROCJPEG_CSS_411: chroma_sub_sampling = "YUV 4:1:1"; break; + case ROCJPEG_CSS_400: chroma_sub_sampling = "YUV 4:0:0"; break; + case ROCJPEG_CSS_UNKNOWN: chroma_sub_sampling = "UNKNOWN"; break; + default: chroma_sub_sampling = ""; break; + } + } + + /** + * @brief Gets the channel pitch and sizes. + * + * This function gets the channel pitch and sizes based on the specified output + * format, chroma subsampling, output image, and channel sizes. + * + * @param decode_params The decode parameters that specify the output format and crop + * rectangle. + * @param subsampling The chroma subsampling. + * @param widths The array to store the channel widths. + * @param heights The array to store the channel heights. + * @param num_channels The number of channels. + * @param output_image The output image. + * @param channel_sizes The array to store the channel sizes. + * @return The channel pitch. + */ + int GetChannelPitchAndSizes(RocJpegDecodeParams decode_params, + RocJpegChromaSubsampling subsampling, uint32_t* widths, + uint32_t* heights, uint32_t& num_channels, + RocJpegImage& output_image, uint32_t* channel_sizes) + { + bool is_roi_valid = false; + uint32_t roi_width; + uint32_t roi_height; + roi_width = + decode_params.crop_rectangle.right - decode_params.crop_rectangle.left; + roi_height = + decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top; + if(roi_width > 0 && roi_height > 0 && roi_width <= widths[0] && + roi_height <= heights[0]) + { + is_roi_valid = true; + } + switch(decode_params.output_format) + { + case ROCJPEG_OUTPUT_NATIVE: + switch(subsampling) + { + case ROCJPEG_CSS_444: + num_channels = 3; + output_image.pitch[2] = output_image.pitch[1] = + output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; + channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = + align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + break; + case ROCJPEG_CSS_440: + num_channels = 3; + output_image.pitch[2] = output_image.pitch[1] = + output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; + channel_sizes[0] = + align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + channel_sizes[2] = channel_sizes[1] = + align(output_image.pitch[0] * + ((is_roi_valid ? roi_height : heights[0]) >> 1), + mem_alignment); + break; + case ROCJPEG_CSS_422: + num_channels = 1; + output_image.pitch[0] = + (is_roi_valid ? roi_width : widths[0]) * 2; + channel_sizes[0] = + align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + break; + case ROCJPEG_CSS_420: + num_channels = 2; + output_image.pitch[1] = output_image.pitch[0] = + is_roi_valid ? roi_width : widths[0]; + channel_sizes[0] = + align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + channel_sizes[1] = + align(output_image.pitch[1] * + ((is_roi_valid ? roi_height : heights[0]) >> 1), + mem_alignment); + break; + case ROCJPEG_CSS_400: + num_channels = 1; + output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; + channel_sizes[0] = + align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return EXIT_FAILURE; + } + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + if(subsampling == ROCJPEG_CSS_400) + { + num_channels = 1; + output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; + channel_sizes[0] = align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + } + else + { + num_channels = 3; + output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; + output_image.pitch[1] = is_roi_valid ? roi_width : widths[1]; + output_image.pitch[2] = is_roi_valid ? roi_width : widths[2]; + channel_sizes[0] = align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + channel_sizes[1] = align(output_image.pitch[1] * + (is_roi_valid ? roi_height : heights[1]), + mem_alignment); + channel_sizes[2] = align(output_image.pitch[2] * + (is_roi_valid ? roi_height : heights[2]), + mem_alignment); + } + break; + case ROCJPEG_OUTPUT_Y: + num_channels = 1; + output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; + channel_sizes[0] = align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + break; + case ROCJPEG_OUTPUT_RGB: + num_channels = 1; + output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3; + channel_sizes[0] = align(output_image.pitch[0] * + (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + break; + case ROCJPEG_OUTPUT_RGB_PLANAR: + num_channels = 3; + output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = + is_roi_valid ? roi_width : widths[0]; + channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align( + output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), + mem_alignment); + break; + default: + std::cout << "Unknown output format!" << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } + + /** + * @brief Gets the output file extension. + * + * This function gets the output file extension based on the specified output format, + * base file name, image width, image height, and file name for saving. + * + * @param output_format The output format. + * @param base_file_name The base file name. + * @param image_width The image width. + * @param image_height The image height. + * @param file_name_for_saving The string to store the file name for saving. + */ + void GetOutputFileExt(RocJpegOutputFormat output_format, std::string& base_file_name, + uint32_t image_width, uint32_t image_height, + RocJpegChromaSubsampling subsampling, + std::string& file_name_for_saving) + { + std::string file_extension; + std::string::size_type const p(base_file_name.find_last_of('.')); + std::string file_name_no_ext = base_file_name.substr(0, p); + std::string format_description = ""; + switch(output_format) + { + case ROCJPEG_OUTPUT_NATIVE: + file_extension = "yuv"; + switch(subsampling) + { + case ROCJPEG_CSS_444: format_description = "444"; break; + case ROCJPEG_CSS_440: format_description = "440"; break; + case ROCJPEG_CSS_422: format_description = "422_yuyv"; break; + case ROCJPEG_CSS_420: format_description = "nv12"; break; + case ROCJPEG_CSS_400: format_description = "400"; break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return; + } + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + file_extension = "yuv"; + format_description = "planar"; + break; + case ROCJPEG_OUTPUT_Y: + file_extension = "yuv"; + format_description = "400"; + break; + case ROCJPEG_OUTPUT_RGB: + file_extension = "rgb"; + format_description = "packed"; + break; + case ROCJPEG_OUTPUT_RGB_PLANAR: + file_extension = "rgb"; + format_description = "planar"; + break; + default: file_extension = ""; break; + } + file_name_for_saving += "//" + file_name_no_ext + "_" + + std::to_string(image_width) + "x" + + std::to_string(image_height) + "_" + format_description + + "." + file_extension; + } + + /** + * @brief Saves the image. + * + * This function saves the image to the specified output file name based on the output + * image, image width, image height, chroma subsampling, and output format. + * + * @param output_file_name The output file name. + * @param output_image The output image. + * @param img_width The image width. + * @param img_height The image height. + * @param subsampling The chroma subsampling. + * @param output_format The output format. + */ + void SaveImage(std::string output_file_name, RocJpegImage* output_image, + uint32_t img_width, uint32_t img_height, + RocJpegChromaSubsampling subsampling, + RocJpegOutputFormat output_format) + { + uint8_t* hst_ptr = nullptr; + FILE* fp; + hipError_t hip_status = hipSuccess; + + if(output_image == nullptr || output_image->channel[0] == nullptr || + output_image->pitch[0] == 0) + { + return; + } + + uint32_t widths[ROCJPEG_MAX_COMPONENT] = {}; + uint32_t heights[ROCJPEG_MAX_COMPONENT] = {}; + + switch(output_format) + { + case ROCJPEG_OUTPUT_NATIVE: + switch(subsampling) + { + case ROCJPEG_CSS_444: + widths[2] = widths[1] = widths[0] = img_width; + heights[2] = heights[1] = heights[0] = img_height; + break; + case ROCJPEG_CSS_440: + widths[2] = widths[1] = widths[0] = img_width; + heights[0] = img_height; + heights[2] = heights[1] = img_height >> 1; + break; + case ROCJPEG_CSS_422: + widths[0] = img_width * 2; + heights[0] = img_height; + break; + case ROCJPEG_CSS_420: + widths[1] = widths[0] = img_width; + heights[0] = img_height; + heights[1] = img_height >> 1; + break; + case ROCJPEG_CSS_400: + widths[0] = img_width; + heights[0] = img_height; + break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return; + } + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + switch(subsampling) + { + case ROCJPEG_CSS_444: + widths[2] = widths[1] = widths[0] = img_width; + heights[2] = heights[1] = heights[0] = img_height; + break; + case ROCJPEG_CSS_440: + widths[2] = widths[1] = widths[0] = img_width; + heights[0] = img_height; + heights[2] = heights[1] = img_height >> 1; + break; + case ROCJPEG_CSS_422: + widths[0] = img_width; + widths[2] = widths[1] = widths[0] >> 1; + heights[2] = heights[1] = heights[0] = img_height; + break; + case ROCJPEG_CSS_420: + widths[0] = img_width; + widths[2] = widths[1] = widths[0] >> 1; + heights[0] = img_height; + heights[2] = heights[1] = img_height >> 1; + break; + case ROCJPEG_CSS_400: + widths[0] = img_width; + heights[0] = img_height; + break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return; + } + break; + case ROCJPEG_OUTPUT_Y: + widths[0] = img_width; + heights[0] = img_height; + break; + case ROCJPEG_OUTPUT_RGB: + widths[0] = img_width * 3; + heights[0] = img_height; + break; + case ROCJPEG_OUTPUT_RGB_PLANAR: + widths[2] = widths[1] = widths[0] = img_width; + heights[2] = heights[1] = heights[0] = img_height; + break; + default: std::cout << "Unknown output format!" << std::endl; return; + } + + uint32_t channel0_size = output_image->pitch[0] * heights[0]; + uint32_t channel1_size = output_image->pitch[1] * heights[1]; + uint32_t channel2_size = output_image->pitch[2] * heights[2]; + + uint32_t output_image_size = channel0_size + channel1_size + channel2_size; + + if(hst_ptr == nullptr) + { + hst_ptr = new uint8_t[output_image_size]; + } + + CHECK_HIP( + hipMemcpyDtoH((void*) hst_ptr, output_image->channel[0], channel0_size)); + + uint8_t* tmp_hst_ptr = hst_ptr; + fp = fopen(output_file_name.c_str(), "wb"); + if(fp) + { + // write channel0 + if(widths[0] == output_image->pitch[0]) + { + fwrite(hst_ptr, 1, channel0_size, fp); + } + else + { + for(int i = 0; i < heights[0]; i++) + { + fwrite(tmp_hst_ptr, 1, widths[0], fp); + tmp_hst_ptr += output_image->pitch[0]; + } + } + // write channel1 + if(channel1_size != 0 && output_image->channel[1] != nullptr) + { + uint8_t* channel1_hst_ptr = hst_ptr + channel0_size; + CHECK_HIP(hipMemcpyDtoH((void*) channel1_hst_ptr, + output_image->channel[1], channel1_size)); + if(widths[1] == output_image->pitch[1]) + { + fwrite(channel1_hst_ptr, 1, channel1_size, fp); + } + else + { + for(int i = 0; i < heights[1]; i++) + { + fwrite(channel1_hst_ptr, 1, widths[1], fp); + channel1_hst_ptr += output_image->pitch[1]; + } + } + } + // write channel2 + if(channel2_size != 0 && output_image->channel[2] != nullptr) + { + uint8_t* channel2_hst_ptr = hst_ptr + channel0_size + channel1_size; + CHECK_HIP(hipMemcpyDtoH((void*) channel2_hst_ptr, + output_image->channel[2], channel2_size)); + if(widths[2] == output_image->pitch[2]) + { + fwrite(channel2_hst_ptr, 1, channel2_size, fp); + } + else + { + for(int i = 0; i < heights[2]; i++) + { + fwrite(channel2_hst_ptr, 1, widths[2], fp); + channel2_hst_ptr += output_image->pitch[2]; + } + } + } + fclose(fp); + } + + if(hst_ptr != nullptr) + { + delete[] hst_ptr; + hst_ptr = nullptr; + tmp_hst_ptr = nullptr; + } + } + +private: + static const int mem_alignment = 4 * 1024 * 1024; + /** + * @brief Shows the help message and exits. + * + * This function shows the help message and exits the program. + * + * @param option The option to display in the help message (optional). + * @param show_threads Flag indicating whether to show the number of threads in the + * help message. + */ + static void ShowHelpAndExit(const char* option = nullptr, bool show_threads = false, + bool show_batch_size = false) + { + std::cout << "Options:\n" + "-i [input path] - input path to a single JPEG image or a " + "directory containing JPEG images - [required]\n" + "-be [backend] - select rocJPEG backend (0 for " + "hardware-accelerated JPEG decoding using VCN,\n" + " 1 for hybrid JPEG " + "decoding using CPU and GPU HIP kernels (currently not supported)) " + "[optional - default: 0]\n" + "-fmt [output format] - select rocJPEG output format for " + "decoding, one of the [native, yuv_planar, y, rgb, rgb_planar] - " + "[optional - default: native]\n" + "-o [output path] - path to an output file or a path to an " + "existing directory - write decoded images to a file or an existing " + "directory based on selected output format - [optional]\n" + "-crop [crop rectangle] - crop rectangle for output in a " + "comma-separated format: left,top,right,bottom - [optional]\n" + "-d [device id] - specify the GPU device id for the desired " + "device (use 0 for the first device, 1 for the second device, and " + "so on) [optional - default: 0]\n"; + if(show_threads) + { + std::cout << "-t [threads] - number of threads (<= 32) for parallel JPEG " + "decoding - [optional - default: 1]\n"; + } + if(show_batch_size) + { + std::cout << "-b [batch_size] - decode images from input by batches of a " + "specified size - [optional - default: 1]\n"; + } + exit(0); + } + /** + * @brief Aligns a value to a specified alignment. + * + * This function takes a value and aligns it to the specified alignment. It returns + * the aligned value. + * + * @param value The value to be aligned. + * @param alignment The alignment value. + * @return The aligned value. + */ + static inline int align(int value, int alignment) + { + return (value + alignment - 1) & ~(alignment - 1); + } +}; + +class ThreadPool +{ +public: + ThreadPool(int nthreads) + : shutdown_(false) + { + // Create the specified number of threads + threads_.reserve(nthreads); + for(int i = 0; i < nthreads; ++i) + threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i)); + } + + ~ThreadPool() {} + + void JoinThreads() + { + { + // Unblock any threads and tell them to stop + std::unique_lock lock(mutex_); + shutdown_ = true; + cond_var_.notify_all(); + } + + // Wait for all threads to stop + for(auto& thread : threads_) + thread.join(); + } + + void ExecuteJob(std::function func) + { + // Place a job on the queue and unblock a thread + std::unique_lock lock(mutex_); + decode_jobs_queue_.emplace(std::move(func)); + cond_var_.notify_one(); + } + +protected: + void ThreadEntry(int i) + { + std::function execute_decode_job; + + while(true) + { + { + std::unique_lock lock(mutex_); + cond_var_.wait(lock, + [&] { return shutdown_ || !decode_jobs_queue_.empty(); }); + if(decode_jobs_queue_.empty()) + { + // No jobs to do; shutting down + return; + } + + execute_decode_job = std::move(decode_jobs_queue_.front()); + decode_jobs_queue_.pop(); + } + + // Execute the decode job without holding any locks + execute_decode_job(); + } + } + + std::mutex mutex_; + std::condition_variable cond_var_; + bool shutdown_; + std::queue> decode_jobs_queue_; + std::vector threads_; +}; +#endif // ROC_JPEG_SAMPLES_COMMON diff --git a/projects/rocprofiler-systems/examples/lulesh/CMakeLists.txt b/projects/rocprofiler-systems/examples/lulesh/CMakeLists.txt new file mode 100644 index 0000000000..8402b3685d --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/CMakeLists.txt @@ -0,0 +1,97 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-lulesh-example LANGUAGES C CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") +string( + REGEX REPLACE + " -g(|[0-2]) " + " -g3 " + CMAKE_CXX_FLAGS_RELWITHDEBINFO + "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" +) + +list(INSERT CMAKE_MODULE_PATH 0 ${PROJECT_SOURCE_DIR}/cmake/Modules) + +option(LULESH_BUILD_KOKKOS "Build Kokkos from submodule" ON) +if(LULESH_BUILD_KOKKOS) + add_subdirectory(external) + if(LULESH_USE_CUDA) + kokkos_compilation(PROJECT COMPILER ${Kokkos_NVCC_WRAPPER}) + elseif(LULESH_USE_HIP AND NOT "${CMAKE_CXX_COMPILER}" MATCHES "hipcc") + if(NOT HIPCC_EXECUTABLE) + find_package(hip QUIET HINTS ${ROCmVersion_DIR} PATHS ${ROCmVersion_DIR}) + + find_program( + HIPCC_EXECUTABLE + NAMES hipcc + HINTS ${ROCmVersion_DIR} ${ROCM_PATH} + ENV ROCM_PATH + /opt/rocm + PATHS ${ROCmVersion_DIR} ${ROCM_PATH} + ENV ROCM_PATH + /opt/rocm + ) + mark_as_advanced(HIPCC_EXECUTABLE) + endif() + kokkos_compilation(PROJECT COMPILER ${HIPCC_EXECUTABLE}) + endif() +else() + find_package(Kokkos REQUIRED COMPONENTS separable_compilation) + kokkos_compilation(PROJECT) +endif() + +set(CMAKE_CXX_EXTENSIONS OFF) + +if("${CMAKE_BUILD_TYPE}" STREQUAL "") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type" FORCE) +endif() + +option(LULESH_USE_MPI "Enable MPI" OFF) +add_library(lulesh-mpi INTERFACE) +if(LULESH_USE_MPI) + find_package(MPI REQUIRED) + target_compile_definitions(lulesh-mpi INTERFACE USE_MPI=1) + target_link_libraries(lulesh-mpi INTERFACE MPI::MPI_C MPI::MPI_CXX) +else() + target_compile_definitions(lulesh-mpi INTERFACE USE_MPI=0) +endif() + +if(NOT TARGET Kokkos::kokkos) + find_package(Kokkos REQUIRED) +endif() + +file(GLOB headers ${PROJECT_SOURCE_DIR}/*.h ${PROJECT_SOURCE_DIR}/*.hxx) +file(GLOB sources ${PROJECT_SOURCE_DIR}/*.cc) + +rocprofiler_systems_causal_example_executable( + "lulesh" + SOURCES ${sources} ${headers} + LINK_LIBRARIES Kokkos::kokkos lulesh-mpi + INCLUDE_DIRECTORIES ${PROJECT_SOURCE_DIR}/includes +) + +if(ROCPROFSYS_INSTALL_EXAMPLES) + if(LULESH_BUILD_KOKKOS) + install( + TARGETS kokkoscore kokkoscontainers + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT rocprofiler-systems-examples + ) + set_target_properties( + lulesh + PROPERTIES INSTALL_RPATH "\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" + ) + endif() +endif() diff --git a/projects/rocprofiler-systems/examples/lulesh/cmake/Modules/Utilities.cmake b/projects/rocprofiler-systems/examples/lulesh/cmake/Modules/Utilities.cmake new file mode 100644 index 0000000000..c6f0df15c1 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/cmake/Modules/Utilities.cmake @@ -0,0 +1,169 @@ +# include guard +include_guard(DIRECTORY) + +# MacroUtilities - useful macros and functions for generic tasks +# + +include(CMakeDependentOption) +include(CMakeParseArguments) + +# ----------------------------------------------------------------------- +# function - capitalize - make a string capitalized (first letter is capital) +# +function(CAPITALIZE str var) + # make string lower + string(TOLOWER "${str}" str) + string(SUBSTRING "${str}" 0 1 _first) + string(TOUPPER "${_first}" _first) + string(SUBSTRING "${str}" 1 -1 _remainder) + string(CONCAT str "${_first}" "${_remainder}") + set(${var} "${str}" PARENT_SCOPE) +endfunction() + +# ----------------------------------------------------------------------------------------# +# function CHECKOUT_GIT_SUBMODULE() +# +# Run "git submodule update" if a file in a submodule does not exist +# +# ARGS: RECURSIVE (option) -- add "--recursive" flag RELATIVE_PATH (one value) -- +# typically the relative path to submodule from PROJECT_SOURCE_DIR WORKING_DIRECTORY (one +# value) -- (default: PROJECT_SOURCE_DIR) TEST_FILE (one value) -- file to check for +# (default: CMakeLists.txt) ADDITIONAL_CMDS (many value) -- any addition commands to pass +# +function(CHECKOUT_GIT_SUBMODULE) + # parse args + cmake_parse_arguments( + CHECKOUT + "RECURSIVE" + "RELATIVE_PATH;WORKING_DIRECTORY;TEST_FILE;REPO_URL;REPO_BRANCH" + "ADDITIONAL_CMDS" + ${ARGN} + ) + + if(NOT CHECKOUT_WORKING_DIRECTORY) + set(CHECKOUT_WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + endif() + + if(NOT CHECKOUT_TEST_FILE) + set(CHECKOUT_TEST_FILE "CMakeLists.txt") + endif() + + # default assumption + if(NOT CHECKOUT_REPO_BRANCH) + set(CHECKOUT_REPO_BRANCH "master") + endif() + + find_package(Git) + set(_DIR "${CHECKOUT_WORKING_DIRECTORY}/${CHECKOUT_RELATIVE_PATH}") + # ensure the (possibly empty) directory exists + if(NOT EXISTS "${_DIR}") + if(NOT CHECKOUT_REPO_URL) + message(FATAL_ERROR "submodule directory does not exist") + endif() + endif() + + # if this file exists --> project has been checked out if not exists --> not been + # checked out + set(_TEST_FILE "${_DIR}/${CHECKOUT_TEST_FILE}") + # assuming a .gitmodules file exists + set(_SUBMODULE "${PROJECT_SOURCE_DIR}/.gitmodules") + + set(_TEST_FILE_EXISTS OFF) + if(EXISTS "${_TEST_FILE}" AND NOT IS_DIRECTORY "${_TEST_FILE}") + set(_TEST_FILE_EXISTS ON) + endif() + + if(_TEST_FILE_EXISTS) + return() + endif() + + find_package(Git REQUIRED) + + set(_SUBMODULE_EXISTS OFF) + if(EXISTS "${_SUBMODULE}" AND NOT IS_DIRECTORY "${_SUBMODULE}") + set(_SUBMODULE_EXISTS ON) + else() + set(_SUBMODULE "${CMAKE_SOURCE_DIR}/.gitmodules") + if(EXISTS "${_SUBMODULE}" AND NOT IS_DIRECTORY "${_SUBMODULE}") + set(_SUBMODULE_EXISTS ON) + endif() + endif() + + set(_HAS_REPO_URL OFF) + if(NOT "${CHECKOUT_REPO_URL}" STREQUAL "") + set(_HAS_REPO_URL ON) + endif() + + # if the module has not been checked out + if(NOT _TEST_FILE_EXISTS AND _SUBMODULE_EXISTS) + # perform the checkout + execute_process( + COMMAND + ${GIT_EXECUTABLE} submodule update --init ${_RECURSE} + ${CHECKOUT_ADDITIONAL_CMDS} ${CHECKOUT_RELATIVE_PATH} + WORKING_DIRECTORY ${CHECKOUT_WORKING_DIRECTORY} + RESULT_VARIABLE RET + ) + + # check the return code + if(RET GREATER 0) + set(_CMD + "${GIT_EXECUTABLE} submodule update --init ${_RECURSE} + ${CHECKOUT_ADDITIONAL_CMDS} ${CHECKOUT_RELATIVE_PATH}" + ) + message(STATUS "function(CHECKOUT_GIT_SUBMODULE) failed.") + message(FATAL_ERROR "Command: \"${_CMD}\"") + else() + set(_TEST_FILE_EXISTS ON) + endif() + endif() + + if(NOT _TEST_FILE_EXISTS AND _HAS_REPO_URL) + message( + STATUS + "Checking out '${CHECKOUT_REPO_URL}' @ '${CHECKOUT_REPO_BRANCH}'..." + ) + + # remove the existing directory + if(EXISTS "${_DIR}") + execute_process(COMMAND ${CMAKE_COMMAND} -E remove_directory ${_DIR}) + endif() + + # perform the checkout + execute_process( + COMMAND + ${GIT_EXECUTABLE} clone -b ${CHECKOUT_REPO_BRANCH} + ${CHECKOUT_ADDITIONAL_CMDS} ${CHECKOUT_REPO_URL} ${CHECKOUT_RELATIVE_PATH} + WORKING_DIRECTORY ${CHECKOUT_WORKING_DIRECTORY} + RESULT_VARIABLE RET + ) + + # perform the submodule update + if(CHECKOUT_RECURSIVE AND EXISTS "${_DIR}" AND IS_DIRECTORY "${_DIR}") + execute_process( + COMMAND ${GIT_EXECUTABLE} submodule update --init ${_RECURSE} + WORKING_DIRECTORY ${_DIR} + RESULT_VARIABLE RET + ) + endif() + + # check the return code + if(RET GREATER 0) + set(_CMD + "${GIT_EXECUTABLE} clone -b ${CHECKOUT_REPO_BRANCH} + ${CHECKOUT_ADDITIONAL_CMDS} ${CHECKOUT_REPO_URL} ${CHECKOUT_RELATIVE_PATH}" + ) + message(STATUS "function(CHECKOUT_GIT_SUBMODULE) failed.") + message(FATAL_ERROR "Command: \"${_CMD}\"") + else() + set(_TEST_FILE_EXISTS ON) + endif() + endif() + + if(NOT EXISTS "${_TEST_FILE}" OR NOT _TEST_FILE_EXISTS) + message( + FATAL_ERROR + "Error checking out submodule: '${CHECKOUT_RELATIVE_PATH}' to '${_DIR}'" + ) + endif() +endfunction() diff --git a/projects/rocprofiler-systems/examples/lulesh/external/CMakeLists.txt b/projects/rocprofiler-systems/examples/lulesh/external/CMakeLists.txt new file mode 100644 index 0000000000..06d250922a --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/external/CMakeLists.txt @@ -0,0 +1,33 @@ +include(Utilities) + +option(LULESH_USE_CUDA "Enable Kokkos CUDA backend for lulesh" OFF) +option(LULESH_USE_HIP "Enable Kokkos HIP backend for lulesh" OFF) + +set(Kokkos_ENABLE_SERIAL + ON + CACHE BOOL "Enable Serial") + +if(LULESH_USE_CUDA) + set(Kokkos_ENABLE_CUDA + ON + CACHE BOOL "Enable CUDA" FORCE) + set(Kokkos_ENABLE_CUDA_LAMBDA + ON + CACHE BOOL "Enable CUDA lambda support" FORCE) +elseif(LULESH_USE_HIP) + set(Kokkos_ENABLE_HIP + ON + CACHE BOOL "Enable HIP" FORCE) +else() + set(Kokkos_ENABLE_OPENMP + ON + CACHE BOOL "Enable OpenMP") +endif() + +checkout_git_submodule( + RELATIVE_PATH external/kokkos WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} REPO_URL + https://github.com/kokkos/kokkos.git REPO_BRANCH develop) + +set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY ON) + +add_subdirectory(kokkos EXCLUDE_FROM_ALL) diff --git a/projects/rocprofiler-systems/examples/lulesh/external/kokkos b/projects/rocprofiler-systems/examples/lulesh/external/kokkos new file mode 160000 index 0000000000..1a0c2ff6da --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/external/kokkos @@ -0,0 +1 @@ +Subproject commit 1a0c2ff6daf1068c65529ec04c2c046177847869 diff --git a/projects/rocprofiler-systems/examples/lulesh/includes/Timer.hxx b/projects/rocprofiler-systems/examples/lulesh/includes/Timer.hxx new file mode 100644 index 0000000000..55902c3632 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/includes/Timer.hxx @@ -0,0 +1,127 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief RAJA header file for simple class that can be used to + * time code sections. + * + * \author Rich Hornung, Center for Applied Scientific Computing, LLNL + * \author Jeff Keasler, Applications, Simulations And Quality, LLNL + * + ****************************************************************************** + */ + +#ifndef RAJA_Timer_HXX +#define RAJA_Timer_HXX + +#if defined(RAJA_USE_CYCLE) +# include "./cycle.h" +typedef ticks TimeType; + +#elif defined(RAJA_USE_CLOCK) +# include +typedef clock_t TimeType; + +#elif defined(RAJA_USE_GETTIME) +# include +typedef timespec TimeType; + +#else +# error RAJA_TIMER_TYPE is undefined! + +#endif + +namespace RAJA +{ +/*! + ****************************************************************************** + * + * \brief Simple timer class to time code sections. + * + ****************************************************************************** + */ +class Timer +{ +public: +#if defined(RAJA_USE_CYCLE) || defined(RAJA_USE_CLOCK) + Timer() + : telapsed(0) + { + ; + } +#endif +#if defined(RAJA_USE_GETTIME) + Timer() + : telapsed(0) + , stime_elapsed(0) + , nstime_elapsed(0) + { + ; + } +#endif + +#if defined(RAJA_USE_CYCLE) + void start() { tstart = getticks(); } + void stop() + { + tstop = getticks(); + set_elapsed(); + } + + long double elapsed() { return static_cast(telapsed); } +#endif + +#if defined(RAJA_USE_CLOCK) + void start() { tstart = clock(); } + void stop() + { + tstop = clock(); + set_elapsed(); + } + + long double elapsed() { return static_cast(telapsed) / CLOCKS_PER_SEC; } +#endif + +#if defined(RAJA_USE_GETTIME) + +# if 0 + void start() { clock_gettime(CLOCK_REALTIME, &tstart); } + void stop() { clock_gettime(CLOCK_REALTIME, &tstop); set_elapsed(); } +# else + void start() { clock_gettime(CLOCK_MONOTONIC, &tstart); } + void stop() + { + clock_gettime(CLOCK_MONOTONIC, &tstop); + set_elapsed(); + } +# endif + + long double elapsed() { return (stime_elapsed + nstime_elapsed); } + +#endif + +private: + TimeType tstart; + TimeType tstop; + long double telapsed; + +#if defined(RAJA_USE_CYCLE) || defined(RAJA_USE_CLOCK) + void set_elapsed() { telapsed += (tstop - tstart); } + +#elif defined(RAJA_USE_GETTIME) + long double stime_elapsed; + long double nstime_elapsed; + + void set_elapsed() + { + stime_elapsed += static_cast(tstop.tv_sec - tstart.tv_sec); + nstime_elapsed += + static_cast(tstop.tv_nsec - tstart.tv_nsec) / 1000000000.0; + } +#endif +}; + +} // namespace RAJA + +#endif // closing endif for header file include guard diff --git a/projects/rocprofiler-systems/examples/lulesh/includes/cycle.h b/projects/rocprofiler-systems/examples/lulesh/includes/cycle.h new file mode 100644 index 0000000000..fc90d38afe --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/includes/cycle.h @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2003, 2007-8 Matteo Frigo + * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* machine-dependent cycle counters code. Needs to be inlined. */ + +/***************************************************************************/ +/* To use the cycle counters in your code, simply #include "cycle.h" (this + file), and then use the functions/macros: + + ticks getticks(void); + + ticks is an opaque typedef defined below, representing the current time. + You extract the elapsed time between two calls to gettick() via: + + double elapsed(ticks t1, ticks t0); + + which returns a double-precision variable in arbitrary units. You + are not expected to convert this into human units like seconds; it + is intended only for *comparisons* of time intervals. + + (In order to use some of the OS-dependent timer routines like + Solaris' gethrtime, you need to paste the autoconf snippet below + into your configure.ac file and #include "config.h" before cycle.h, + or define the relevant macros manually if you are not using autoconf.) +*/ + +/***************************************************************************/ +/* This file uses macros like HAVE_GETHRTIME that are assumed to be + defined according to whether the corresponding function/type/header + is available on your system. The necessary macros are most + conveniently defined if you are using GNU autoconf, via the tests: + + dnl --------------------------------------------------------------------- + + AC_C_INLINE + AC_HEADER_TIME + AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) + + AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is +defined in ])],,[#if HAVE_SYS_TIME_H #include #endif]) + + AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime +mach_absolute_time]) + + dnl Cray UNICOS _rtc() (real-time clock) intrinsic + AC_MSG_CHECKING([for _rtc intrinsic]) + rtc_ok=yes + AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H +#include +#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() +intrinsic.])], [rtc_ok=no]) AC_MSG_RESULT($rtc_ok) + + dnl --------------------------------------------------------------------- +*/ + +/***************************************************************************/ + +#if TIME_WITH_SYS_TIME +# include +# include +#else +# if HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +#define INLINE_ELAPSED(INL) \ + static INL double elapsed(ticks t1, ticks t0) { return (double) t1 - (double) t0; } + +/*----------------------------------------------------------------*/ +/* Solaris */ +#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) +typedef hrtime_t ticks; + +# define getticks gethrtime + +INLINE_ELAPSED(inline) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* AIX v. 4+ routines to read the real-time clock or time-base register */ +#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && \ + !defined(HAVE_TICK_COUNTER) +typedef timebasestruct_t ticks; + +static __inline ticks +getticks(void) +{ + ticks t; + read_real_time(&t, TIMEBASE_SZ); + return t; +} + +static __inline double +elapsed(ticks t1, ticks t0) /* time in nanoseconds */ +{ + time_base_to_time(&t1, TIMEBASE_SZ); + time_base_to_time(&t0, TIMEBASE_SZ); + return (((double) t1.tb_high - (double) t0.tb_high) * 1.0e9 + + ((double) t1.tb_low - (double) t0.tb_low)); +} + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PowerPC ``cycle'' counter using the time base register. + */ +#if((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || \ + (defined(__MWERKS__) && defined(macintosh)))) || \ + (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && \ + !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks +getticks(void) +{ + unsigned int tbl, tbu0, tbu1; + + do + { + __asm__ __volatile__("mftbu %0" : "=r"(tbu0)); + __asm__ __volatile__("mftb %0" : "=r"(tbl)); + __asm__ __volatile__("mftbu %0" : "=r"(tbu1)); + } while(tbu0 != tbu1); + + return (((unsigned long long) tbu0) << 32) | tbl; +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif + +/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, + from Carbon, requires no additional libraries to be linked). */ +#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && \ + !defined(HAVE_TICK_COUNTER) +# include +typedef uint64_t ticks; +# define getticks mach_absolute_time +INLINE_ELAPSED(__inline__) +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * Pentium cycle counter + */ +#if(defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && \ + !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks +getticks(void) +{ + ticks ret; + + __asm__ __volatile__("rdtsc" : "=A"(ret)); + /* no input, nothing else clobbered */ + return ret; +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +# define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ +#endif + +/* Visual C++ -- thanks to Morten Nissov for his help with this */ +#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) +# include +typedef LARGE_INTEGER ticks; +# define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ + +static __inline ticks +getticks(void) +{ + ticks retval; + + __asm { + RDTSC + mov retval.HighPart, edx + mov retval.LowPart, eax + } + return retval; +} + +static __inline double +elapsed(ticks t1, ticks t0) +{ + return (double) t1.QuadPart - (double) t0.QuadPart; +} + +# define HAVE_TICK_COUNTER +# define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ +#endif + +/*----------------------------------------------------------------*/ +/* + * X86-64 cycle counter + */ +#if(defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && \ + defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks +getticks(void) +{ + unsigned a, d; + __asm__ volatile("rdtsc" : "=a"(a), "=d"(d)); + return ((ticks) a) | (((ticks) d) << 32); +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif + +/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. + NOTE: this code will fail to link unless you use the -Masmkeyword compiler + option (grrr). */ +#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; +static ticks +getticks(void) +{ + asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "); +} +INLINE_ELAPSED(__inline__) +# define HAVE_TICK_COUNTER +#endif + +/* Visual C++, courtesy of Dirk Michaelis */ +#if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && \ + !defined(HAVE_TICK_COUNTER) + +# include +# pragma intrinsic(__rdtsc) +typedef unsigned __int64 ticks; +# define getticks __rdtsc +INLINE_ELAPSED(__inline) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * IA64 cycle counter + */ + +/* intel's icc/ecc compiler */ +#if(defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && \ + !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; +# include + +static __inline__ ticks +getticks(void) +{ + return __getReg(_IA64_REG_AR_ITC); +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif + +/* gcc */ +#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks +getticks(void) +{ + ticks ret; + + __asm__ __volatile__("mov %0=ar.itc" : "=r"(ret)); + return ret; +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif + +/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ +#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) +# include +typedef unsigned long ticks; + +static inline ticks +getticks(void) +{ + ticks ret; + + ret = _Asm_mov_from_ar(_AREG_ITC); + return ret; +} + +INLINE_ELAPSED(inline) + +# define HAVE_TICK_COUNTER +#endif + +/* Microsoft Visual C++ */ +#if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) +typedef unsigned __int64 ticks; + +# ifdef __cplusplus +extern "C" +# endif + ticks + __getReg(int whichReg); +# pragma intrinsic(__getReg) + +static __inline ticks +getticks(void) +{ + volatile ticks temp; + temp = __getReg(3116); + return temp; +} + +INLINE_ELAPSED(inline) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PA-RISC cycle counter + */ +#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +# ifdef __GNUC__ +static __inline__ ticks +getticks(void) +{ + ticks ret; + + __asm__ __volatile__("mfctl 16, %0" : "=r"(ret)); + /* no input, nothing else clobbered */ + return ret; +} +# else +# include +static inline unsigned long +getticks(void) +{ + register ticks ret; + _MFCTL(16, ret); + return ret; +} +# endif + +INLINE_ELAPSED(inline) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* S390, courtesy of James Treacy */ +#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks +getticks(void) +{ + ticks cycles; + __asm__("stck 0(%0)" : : "a"(&(cycles)) : "memory", "cc"); + return cycles; +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) +/* + * The 32-bit cycle counter on alpha overflows pretty quickly, + * unfortunately. A 1GHz machine overflows in 4 seconds. + */ +typedef unsigned int ticks; + +static __inline__ ticks +getticks(void) +{ + unsigned long cc; + __asm__ __volatile__("rpcc %0" : "=r"(cc)); + return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks +getticks(void) +{ + ticks ret; + __asm__ __volatile__("rd %%tick, %0" : "=r"(ret)); + return ret; +} + +INLINE_ELAPSED(__inline__) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if(defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && \ + defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) +# include +typedef unsigned int ticks; + +static __inline ticks +getticks(void) +{ + unsigned long cc; + cc = asm("rpcc %v0"); + return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline) + +# define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +/* SGI/Irix */ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) +typedef struct timespec ticks; + +static inline ticks +getticks(void) +{ + struct timespec t; + clock_gettime(CLOCK_SGI_CYCLE, &t); + return t; +} + +static inline double +elapsed(ticks t1, ticks t0) +{ + return ((double) t1.tv_sec - (double) t0.tv_sec) * 1.0E9 + + ((double) t1.tv_nsec - (double) t0.tv_nsec); +} +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* Cray UNICOS _rtc() intrinsic function */ +#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) +# ifdef HAVE_INTRINSICS_H +# include +# endif + +typedef long long ticks; + +# define getticks _rtc + +INLINE_ELAPSED(inline) + +# define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* MIPS ZBus */ +#if HAVE_MIPS_ZBUS_TIMER +# if defined(__mips__) && !defined(HAVE_TICK_COUNTER) +# include +# include +# include + +typedef uint64_t ticks; + +static inline ticks +getticks(void) +{ + static uint64_t* addr = 0; + + if(addr == 0) + { + uint32_t rq_addr = 0x10030000; + int fd; + int pgsize; + + pgsize = getpagesize(); + fd = open("/dev/mem", O_RDONLY | O_SYNC, 0); + if(fd < 0) + { + perror("open"); + return NULL; + } + addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr); + close(fd); + if(addr == (uint64_t*) -1) + { + perror("mmap"); + return NULL; + } + } + + return *addr; +} + +INLINE_ELAPSED(inline) + +# define HAVE_TICK_COUNTER +# endif +#endif /* HAVE_MIPS_ZBUS_TIMER */ diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh-comm.cc b/projects/rocprofiler-systems/examples/lulesh/lulesh-comm.cc new file mode 100644 index 0000000000..90a225d164 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh-comm.cc @@ -0,0 +1,2068 @@ +#include "lulesh.h" + +// If no MPI, then this whole file is stubbed out +#if USE_MPI + +# include +# include + +/* Comm Routines */ + +# define ALLOW_UNPACKED_PLANE false +# define ALLOW_UNPACKED_ROW false +# define ALLOW_UNPACKED_COL false + +/* + There are coherence issues for packing and unpacking message + buffers. Ideally, you would like a lot of threads to + cooperate in the assembly/dissassembly of each message. + To do that, each thread should really be operating in a + different coherence zone. + + Let's assume we have three fields, f1 through f3, defined on + a 61x61x61 cube. If we want to send the block boundary + information for each field to each neighbor processor across + each cube face, then we have three cases for the + memory layout/coherence of data on each of the six cube + boundaries: + + (a) Two of the faces will be in contiguous memory blocks + (b) Two of the faces will be comprised of pencils of + contiguous memory. + (c) Two of the faces will have large strides between + every value living on the face. + + How do you pack and unpack this data in buffers to + simultaneous achieve the best memory efficiency and + the most thread independence? + + Do do you pack field f1 through f3 tighly to reduce message + size? Do you align each field on a cache coherence boundary + within the message so that threads can pack and unpack each + field independently? For case (b), do you align each + boundary pencil of each field separately? This increases + the message size, but could improve cache coherence so + each pencil could be processed independently by a separate + thread with no conflicts. + + Also, memory access for case (c) would best be done without + going through the cache (the stride is so large it just causes + a lot of useless cache evictions). Is it worth creating + a special case version of the packing algorithm that uses + non-coherent load/store opcodes? +*/ + +/******************************************/ + +/* doRecv flag only works with regular block structure */ +void +CommRecv(Domain& domain, int msgType, Index_t xferFields, Index_t dx, Index_t dy, + Index_t dz, bool doRecv, bool planeOnly) +{ + if(domain.numRanks() == 1) return; + + /* post recieve buffers for all incoming messages */ + int myRank; + Index_t maxPlaneComm = xferFields * domain.maxPlaneSize(); + Index_t maxEdgeComm = xferFields * domain.maxEdgeSize(); + Index_t pmsg = 0; /* plane comm msg */ + Index_t emsg = 0; /* edge comm msg */ + Index_t cmsg = 0; /* corner comm msg */ + MPI_Datatype baseType = ((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE); + bool rowMin, rowMax, colMin, colMax, planeMin, planeMax; + + /* assume communication to 6 neighbors by default */ + rowMin = rowMax = colMin = colMax = planeMin = planeMax = true; + + if(domain.rowLoc() == 0) + { + rowMin = false; + } + if(domain.rowLoc() == (domain.tp() - 1)) + { + rowMax = false; + } + if(domain.colLoc() == 0) + { + colMin = false; + } + if(domain.colLoc() == (domain.tp() - 1)) + { + colMax = false; + } + if(domain.planeLoc() == 0) + { + planeMin = false; + } + if(domain.planeLoc() == (domain.tp() - 1)) + { + planeMax = false; + } + + for(Index_t i = 0; i < 26; ++i) + { + domain.recvRequest[i] = MPI_REQUEST_NULL; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + + /* post receives */ + + /* receive data from neighboring domain faces */ + if(planeMin && doRecv) + { + /* contiguous memory */ + int fromRank = myRank - domain.tp() * domain.tp(); + int recvCount = dx * dy * xferFields; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm], recvCount, baseType, + fromRank, msgType, MPI_COMM_WORLD, &domain.recvRequest[pmsg]); + ++pmsg; + } + if(planeMax) + { + /* contiguous memory */ + int fromRank = myRank + domain.tp() * domain.tp(); + int recvCount = dx * dy * xferFields; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm], recvCount, baseType, + fromRank, msgType, MPI_COMM_WORLD, &domain.recvRequest[pmsg]); + ++pmsg; + } + if(rowMin && doRecv) + { + /* semi-contiguous memory */ + int fromRank = myRank - domain.tp(); + int recvCount = dx * dz * xferFields; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm], recvCount, baseType, + fromRank, msgType, MPI_COMM_WORLD, &domain.recvRequest[pmsg]); + ++pmsg; + } + if(rowMax) + { + /* semi-contiguous memory */ + int fromRank = myRank + domain.tp(); + int recvCount = dx * dz * xferFields; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm], recvCount, baseType, + fromRank, msgType, MPI_COMM_WORLD, &domain.recvRequest[pmsg]); + ++pmsg; + } + if(colMin && doRecv) + { + /* scattered memory */ + int fromRank = myRank - 1; + int recvCount = dy * dz * xferFields; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm], recvCount, baseType, + fromRank, msgType, MPI_COMM_WORLD, &domain.recvRequest[pmsg]); + ++pmsg; + } + if(colMax) + { + /* scattered memory */ + int fromRank = myRank + 1; + int recvCount = dy * dz * xferFields; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm], recvCount, baseType, + fromRank, msgType, MPI_COMM_WORLD, &domain.recvRequest[pmsg]); + ++pmsg; + } + + if(!planeOnly) + { + /* receive data from domains connected only by an edge */ + if(rowMin && colMin && doRecv) + { + int fromRank = myRank - domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dz * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && planeMin && doRecv) + { + int fromRank = myRank - domain.tp() * domain.tp() - domain.tp(); + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dx * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(colMin && planeMin && doRecv) + { + int fromRank = myRank - domain.tp() * domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dy * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && colMax) + { + int fromRank = myRank + domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dz * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && planeMax) + { + int fromRank = myRank + domain.tp() * domain.tp() + domain.tp(); + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dx * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(colMax && planeMax) + { + int fromRank = myRank + domain.tp() * domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dy * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && colMin) + { + int fromRank = myRank + domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dz * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && planeMax) + { + int fromRank = myRank + domain.tp() * domain.tp() - domain.tp(); + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dx * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(colMin && planeMax) + { + int fromRank = myRank + domain.tp() * domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dy * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && colMax && doRecv) + { + int fromRank = myRank - domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dz * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && planeMin && doRecv) + { + int fromRank = myRank - domain.tp() * domain.tp() + domain.tp(); + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dx * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + if(colMax && planeMin && doRecv) + { + int fromRank = myRank - domain.tp() * domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm], + dy * xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg]); + ++emsg; + } + + /* receive data from domains connected only by a corner */ + if(rowMin && colMin && planeMin && doRecv) + { + /* corner at domain logical coord (0, 0, 0) */ + int fromRank = myRank - domain.tp() * domain.tp() - domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMin && colMin && planeMax) + { + /* corner at domain logical coord (0, 0, 1) */ + int fromRank = myRank + domain.tp() * domain.tp() - domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMin && colMax && planeMin && doRecv) + { + /* corner at domain logical coord (1, 0, 0) */ + int fromRank = myRank - domain.tp() * domain.tp() - domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMin && colMax && planeMax) + { + /* corner at domain logical coord (1, 0, 1) */ + int fromRank = myRank + domain.tp() * domain.tp() - domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMin && planeMin && doRecv) + { + /* corner at domain logical coord (0, 1, 0) */ + int fromRank = myRank - domain.tp() * domain.tp() + domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMin && planeMax) + { + /* corner at domain logical coord (0, 1, 1) */ + int fromRank = myRank + domain.tp() * domain.tp() + domain.tp() - 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMax && planeMin && doRecv) + { + /* corner at domain logical coord (1, 1, 0) */ + int fromRank = myRank - domain.tp() * domain.tp() + domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMax && planeMax) + { + /* corner at domain logical coord (1, 1, 1) */ + int fromRank = myRank + domain.tp() * domain.tp() + domain.tp() + 1; + MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL], + xferFields, baseType, fromRank, msgType, MPI_COMM_WORLD, + &domain.recvRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + } +} + +/******************************************/ + +void +CommSend(Domain& domain, int msgType, Index_t xferFields, Domain_member* fieldData, + Index_t dx, Index_t dy, Index_t dz, bool doSend, bool planeOnly) +{ + if(domain.numRanks() == 1) return; + + /* post recieve buffers for all incoming messages */ + int myRank; + Index_t maxPlaneComm = xferFields * domain.maxPlaneSize(); + Index_t maxEdgeComm = xferFields * domain.maxEdgeSize(); + Index_t pmsg = 0; /* plane comm msg */ + Index_t emsg = 0; /* edge comm msg */ + Index_t cmsg = 0; /* corner comm msg */ + MPI_Datatype baseType = ((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE); + MPI_Status status[26]; + Real_t* destAddr; + bool rowMin, rowMax, colMin, colMax, planeMin, planeMax; + /* assume communication to 6 neighbors by default */ + rowMin = rowMax = colMin = colMax = planeMin = planeMax = true; + if(domain.rowLoc() == 0) + { + rowMin = false; + } + if(domain.rowLoc() == (domain.tp() - 1)) + { + rowMax = false; + } + if(domain.colLoc() == 0) + { + colMin = false; + } + if(domain.colLoc() == (domain.tp() - 1)) + { + colMax = false; + } + if(domain.planeLoc() == 0) + { + planeMin = false; + } + if(domain.planeLoc() == (domain.tp() - 1)) + { + planeMax = false; + } + + for(Index_t i = 0; i < 26; ++i) + { + domain.sendRequest[i] = MPI_REQUEST_NULL; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + + /* post sends */ + + if(planeMin | planeMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + int sendCount = dx * dy; + + if(planeMin) + { + destAddr = &domain.commDataSend[pmsg * maxPlaneComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < sendCount; ++i) + { + destAddr[i] = (domain.*src)(i); + } + destAddr += sendCount; + } + destAddr -= xferFields * sendCount; + + MPI_Isend(destAddr, xferFields * sendCount, baseType, + myRank - domain.tp() * domain.tp(), msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg]); + ++pmsg; + } + if(planeMax && doSend) + { + destAddr = &domain.commDataSend[pmsg * maxPlaneComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < sendCount; ++i) + { + destAddr[i] = (domain.*src)(dx * dy * (dz - 1) + i); + } + destAddr += sendCount; + } + destAddr -= xferFields * sendCount; + + MPI_Isend(destAddr, xferFields * sendCount, baseType, + myRank + domain.tp() * domain.tp(), msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg]); + ++pmsg; + } + } + if(rowMin | rowMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + int sendCount = dx * dz; + + if(rowMin) + { + destAddr = &domain.commDataSend[pmsg * maxPlaneComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dx; ++j) + { + destAddr[i * dx + j] = (domain.*src)(i * dx * dy + j); + } + } + destAddr += sendCount; + } + destAddr -= xferFields * sendCount; + + MPI_Isend(destAddr, xferFields * sendCount, baseType, myRank - domain.tp(), + msgType, MPI_COMM_WORLD, &domain.sendRequest[pmsg]); + ++pmsg; + } + if(rowMax && doSend) + { + destAddr = &domain.commDataSend[pmsg * maxPlaneComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dx; ++j) + { + destAddr[i * dx + j] = + (domain.*src)(dx * (dy - 1) + i * dx * dy + j); + } + } + destAddr += sendCount; + } + destAddr -= xferFields * sendCount; + + MPI_Isend(destAddr, xferFields * sendCount, baseType, myRank + domain.tp(), + msgType, MPI_COMM_WORLD, &domain.sendRequest[pmsg]); + ++pmsg; + } + } + if(colMin | colMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + int sendCount = dy * dz; + + if(colMin) + { + destAddr = &domain.commDataSend[pmsg * maxPlaneComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dy; ++j) + { + destAddr[i * dy + j] = (domain.*src)(i * dx * dy + j * dx); + } + } + destAddr += sendCount; + } + destAddr -= xferFields * sendCount; + + MPI_Isend(destAddr, xferFields * sendCount, baseType, myRank - 1, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg]); + ++pmsg; + } + if(colMax && doSend) + { + destAddr = &domain.commDataSend[pmsg * maxPlaneComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dy; ++j) + { + destAddr[i * dy + j] = + (domain.*src)(dx - 1 + i * dx * dy + j * dx); + } + } + destAddr += sendCount; + } + destAddr -= xferFields * sendCount; + + MPI_Isend(destAddr, xferFields * sendCount, baseType, myRank + 1, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg]); + ++pmsg; + } + } + + if(!planeOnly) + { + if(rowMin && colMin) + { + int toRank = myRank - domain.tp() - 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + destAddr[i] = (domain.*src)(i * dx * dy); + } + destAddr += dz; + } + destAddr -= xferFields * dz; + MPI_Isend(destAddr, xferFields * dz, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && planeMin) + { + int toRank = myRank - domain.tp() * domain.tp() - domain.tp(); + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + destAddr[i] = (domain.*src)(i); + } + destAddr += dx; + } + destAddr -= xferFields * dx; + MPI_Isend(destAddr, xferFields * dx, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(colMin && planeMin) + { + int toRank = myRank - domain.tp() * domain.tp() - 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + destAddr[i] = (domain.*src)(i * dx); + } + destAddr += dy; + } + destAddr -= xferFields * dy; + MPI_Isend(destAddr, xferFields * dy, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && colMax && doSend) + { + int toRank = myRank + domain.tp() + 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + destAddr[i] = (domain.*src)(dx * dy - 1 + i * dx * dy); + } + destAddr += dz; + } + destAddr -= xferFields * dz; + MPI_Isend(destAddr, xferFields * dz, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && planeMax && doSend) + { + int toRank = myRank + domain.tp() * domain.tp() + domain.tp(); + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + destAddr[i] = (domain.*src)(dx * (dy - 1) + dx * dy * (dz - 1) + i); + } + destAddr += dx; + } + destAddr -= xferFields * dx; + MPI_Isend(destAddr, xferFields * dx, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(colMax && planeMax && doSend) + { + int toRank = myRank + domain.tp() * domain.tp() + 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + destAddr[i] = (domain.*src)(dx * dy * (dz - 1) + dx - 1 + i * dx); + } + destAddr += dy; + } + destAddr -= xferFields * dy; + MPI_Isend(destAddr, xferFields * dy, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && colMin && doSend) + { + int toRank = myRank + domain.tp() - 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + destAddr[i] = (domain.*src)(dx * (dy - 1) + i * dx * dy); + } + destAddr += dz; + } + destAddr -= xferFields * dz; + MPI_Isend(destAddr, xferFields * dz, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && planeMax && doSend) + { + int toRank = myRank + domain.tp() * domain.tp() - domain.tp(); + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + destAddr[i] = (domain.*src)(dx * dy * (dz - 1) + i); + } + destAddr += dx; + } + destAddr -= xferFields * dx; + MPI_Isend(destAddr, xferFields * dx, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(colMin && planeMax && doSend) + { + int toRank = myRank + domain.tp() * domain.tp() - 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + destAddr[i] = (domain.*src)(dx * dy * (dz - 1) + i * dx); + } + destAddr += dy; + } + destAddr -= xferFields * dy; + MPI_Isend(destAddr, xferFields * dy, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && colMax) + { + int toRank = myRank - domain.tp() + 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + destAddr[i] = (domain.*src)(dx - 1 + i * dx * dy); + } + destAddr += dz; + } + destAddr -= xferFields * dz; + MPI_Isend(destAddr, xferFields * dz, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMax && planeMin) + { + int toRank = myRank - domain.tp() * domain.tp() + domain.tp(); + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + destAddr[i] = (domain.*src)(dx * (dy - 1) + i); + } + destAddr += dx; + } + destAddr -= xferFields * dx; + MPI_Isend(destAddr, xferFields * dx, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(colMax && planeMin) + { + int toRank = myRank - domain.tp() * domain.tp() + 1; + destAddr = &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member src = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + destAddr[i] = (domain.*src)(dx - 1 + i * dx); + } + destAddr += dy; + } + destAddr -= xferFields * dy; + MPI_Isend(destAddr, xferFields * dy, baseType, toRank, msgType, + MPI_COMM_WORLD, &domain.sendRequest[pmsg + emsg]); + ++emsg; + } + + if(rowMin && colMin && planeMin) + { + /* corner at domain logical coord (0, 0, 0) */ + int toRank = myRank - domain.tp() * domain.tp() - domain.tp() - 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(0); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMin && colMin && planeMax && doSend) + { + /* corner at domain logical coord (0, 0, 1) */ + int toRank = myRank + domain.tp() * domain.tp() - domain.tp() - 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMin && colMax && planeMin) + { + /* corner at domain logical coord (1, 0, 0) */ + int toRank = myRank - domain.tp() * domain.tp() - domain.tp() + 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx - 1; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMin && colMax && planeMax && doSend) + { + /* corner at domain logical coord (1, 0, 1) */ + int toRank = myRank + domain.tp() * domain.tp() - domain.tp() + 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1) + (dx - 1); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMin && planeMin) + { + /* corner at domain logical coord (0, 1, 0) */ + int toRank = myRank - domain.tp() * domain.tp() + domain.tp() - 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * (dy - 1); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMin && planeMax && doSend) + { + /* corner at domain logical coord (0, 1, 1) */ + int toRank = myRank + domain.tp() * domain.tp() + domain.tp() - 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1) + dx * (dy - 1); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMax && planeMin) + { + /* corner at domain logical coord (1, 1, 0) */ + int toRank = myRank - domain.tp() * domain.tp() + domain.tp() + 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy - 1; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + if(rowMax && colMax && planeMax && doSend) + { + /* corner at domain logical coord (1, 1, 1) */ + int toRank = myRank + domain.tp() * domain.tp() + domain.tp() + 1; + Real_t* comBuf = + &domain.commDataSend[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * dz - 1; + for(Index_t fi = 0; fi < xferFields; ++fi) + { + comBuf[fi] = (domain.*fieldData[fi])(idx); + } + MPI_Isend(comBuf, xferFields, baseType, toRank, msgType, MPI_COMM_WORLD, + &domain.sendRequest[pmsg + emsg + cmsg]); + ++cmsg; + } + } + + MPI_Waitall(26, domain.sendRequest, status); +} + +/******************************************/ + +void +CommSBN(Domain& domain, int xferFields, Domain_member* fieldData) +{ + if(domain.numRanks() == 1) return; + + /* summation order should be from smallest value to largest */ + /* or we could try out kahan summation! */ + + int myRank; + Index_t maxPlaneComm = xferFields * domain.maxPlaneSize(); + Index_t maxEdgeComm = xferFields * domain.maxEdgeSize(); + Index_t pmsg = 0; /* plane comm msg */ + Index_t emsg = 0; /* edge comm msg */ + Index_t cmsg = 0; /* corner comm msg */ + Index_t dx = domain.sizeX() + 1; + Index_t dy = domain.sizeY() + 1; + Index_t dz = domain.sizeZ() + 1; + MPI_Status status; + Real_t* srcAddr; + Index_t rowMin, rowMax, colMin, colMax, planeMin, planeMax; + /* assume communication to 6 neighbors by default */ + rowMin = rowMax = colMin = colMax = planeMin = planeMax = 1; + if(domain.rowLoc() == 0) + { + rowMin = 0; + } + if(domain.rowLoc() == (domain.tp() - 1)) + { + rowMax = 0; + } + if(domain.colLoc() == 0) + { + colMin = 0; + } + if(domain.colLoc() == (domain.tp() - 1)) + { + colMax = 0; + } + if(domain.planeLoc() == 0) + { + planeMin = 0; + } + if(domain.planeLoc() == (domain.tp() - 1)) + { + planeMax = 0; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + + if(planeMin | planeMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dx * dy; + + if(planeMin) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(i) += srcAddr[i]; + } + srcAddr += opCount; + } + ++pmsg; + } + if(planeMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + i) += srcAddr[i]; + } + srcAddr += opCount; + } + ++pmsg; + } + } + + if(rowMin | rowMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dx * dz; + + if(rowMin) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dx; ++j) + { + (domain.*dest)(i * dx * dy + j) += srcAddr[i * dx + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + if(rowMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dx; ++j) + { + (domain.*dest)(dx * (dy - 1) + i * dx * dy + j) += + srcAddr[i * dx + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + } + if(colMin | colMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dy * dz; + + if(colMin) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dy; ++j) + { + (domain.*dest)(i * dx * dy + j * dx) += srcAddr[i * dy + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + if(colMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dy; ++j) + { + (domain.*dest)(dx - 1 + i * dx * dy + j * dx) += + srcAddr[i * dy + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + } + + if(rowMin & colMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(i * dx * dy) += srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMin & planeMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(i) += srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMin & planeMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(i * dx) += srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMax & colMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(dx * dy - 1 + i * dx * dy) += srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMax & planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(dx * (dy - 1) + dx * dy * (dz - 1) + i) += srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMax & planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + dx - 1 + i * dx) += srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMax & colMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(dx * (dy - 1) + i * dx * dy) += srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMin & planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + i) += srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMin & planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + i * dx) += srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMin & colMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(dx - 1 + i * dx * dy) += srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMax & planeMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(dx * (dy - 1) + i) += srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMax & planeMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(dx - 1 + i * dx) += srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMin & colMin & planeMin) + { + /* corner at domain logical coord (0, 0, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(0) += comBuf[fi]; + } + ++cmsg; + } + if(rowMin & colMin & planeMax) + { + /* corner at domain logical coord (0, 0, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } + if(rowMin & colMax & planeMin) + { + /* corner at domain logical coord (1, 0, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx - 1; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } + if(rowMin & colMax & planeMax) + { + /* corner at domain logical coord (1, 0, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1) + (dx - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } + if(rowMax & colMin & planeMin) + { + /* corner at domain logical coord (0, 1, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * (dy - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } + if(rowMax & colMin & planeMax) + { + /* corner at domain logical coord (0, 1, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1) + dx * (dy - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } + if(rowMax & colMax & planeMin) + { + /* corner at domain logical coord (1, 1, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy - 1; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } + if(rowMax & colMax & planeMax) + { + /* corner at domain logical coord (1, 1, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * dz - 1; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) += comBuf[fi]; + } + ++cmsg; + } +} + +/******************************************/ + +void +CommSyncPosVel(Domain& domain) +{ + if(domain.numRanks() == 1) return; + + int myRank; + bool doRecv = false; + Index_t xferFields = 6; /* x, y, z, xd, yd, zd */ + Domain_member fieldData[6]; + Index_t maxPlaneComm = xferFields * domain.maxPlaneSize(); + Index_t maxEdgeComm = xferFields * domain.maxEdgeSize(); + Index_t pmsg = 0; /* plane comm msg */ + Index_t emsg = 0; /* edge comm msg */ + Index_t cmsg = 0; /* corner comm msg */ + Index_t dx = domain.sizeX() + 1; + Index_t dy = domain.sizeY() + 1; + Index_t dz = domain.sizeZ() + 1; + MPI_Status status; + Real_t* srcAddr; + bool rowMin, rowMax, colMin, colMax, planeMin, planeMax; + + /* assume communication to 6 neighbors by default */ + rowMin = rowMax = colMin = colMax = planeMin = planeMax = true; + if(domain.rowLoc() == 0) + { + rowMin = false; + } + if(domain.rowLoc() == (domain.tp() - 1)) + { + rowMax = false; + } + if(domain.colLoc() == 0) + { + colMin = false; + } + if(domain.colLoc() == (domain.tp() - 1)) + { + colMax = false; + } + if(domain.planeLoc() == 0) + { + planeMin = false; + } + if(domain.planeLoc() == (domain.tp() - 1)) + { + planeMax = false; + } + + fieldData[0] = &Domain::x; + fieldData[1] = &Domain::y; + fieldData[2] = &Domain::z; + fieldData[3] = &Domain::xd; + fieldData[4] = &Domain::yd; + fieldData[5] = &Domain::zd; + + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + + if(planeMin | planeMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dx * dy; + + if(planeMin && doRecv) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(i) = srcAddr[i]; + } + srcAddr += opCount; + } + ++pmsg; + } + if(planeMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + i) = srcAddr[i]; + } + srcAddr += opCount; + } + ++pmsg; + } + } + + if(rowMin | rowMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dx * dz; + + if(rowMin && doRecv) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dx; ++j) + { + (domain.*dest)(i * dx * dy + j) = srcAddr[i * dx + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + if(rowMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dx; ++j) + { + (domain.*dest)(dx * (dy - 1) + i * dx * dy + j) = + srcAddr[i * dx + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + } + + if(colMin | colMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dy * dz; + + if(colMin && doRecv) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dy; ++j) + { + (domain.*dest)(i * dx * dy + j * dx) = srcAddr[i * dy + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + if(colMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + for(Index_t j = 0; j < dy; ++j) + { + (domain.*dest)(dx - 1 + i * dx * dy + j * dx) = + srcAddr[i * dy + j]; + } + } + srcAddr += opCount; + } + ++pmsg; + } + } + + if(rowMin && colMin && doRecv) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(i * dx * dy) = srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMin && planeMin && doRecv) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(i) = srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMin && planeMin && doRecv) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(i * dx) = srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMax && colMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(dx * dy - 1 + i * dx * dy) = srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMax && planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(dx * (dy - 1) + dx * dy * (dz - 1) + i) = srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMax && planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + dx - 1 + i * dx) = srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMax && colMin) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(dx * (dy - 1) + i * dx * dy) = srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMin && planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + i) = srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMin && planeMax) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(dx * dy * (dz - 1) + i * dx) = srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMin && colMax && doRecv) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dz; ++i) + { + (domain.*dest)(dx - 1 + i * dx * dy) = srcAddr[i]; + } + srcAddr += dz; + } + ++emsg; + } + + if(rowMax && planeMin && doRecv) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dx; ++i) + { + (domain.*dest)(dx * (dy - 1) + i) = srcAddr[i]; + } + srcAddr += dx; + } + ++emsg; + } + + if(colMax && planeMin && doRecv) + { + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm]; + MPI_Wait(&domain.recvRequest[pmsg + emsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < dy; ++i) + { + (domain.*dest)(dx - 1 + i * dx) = srcAddr[i]; + } + srcAddr += dy; + } + ++emsg; + } + + if(rowMin && colMin && planeMin && doRecv) + { + /* corner at domain logical coord (0, 0, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(0) = comBuf[fi]; + } + ++cmsg; + } + if(rowMin && colMin && planeMax) + { + /* corner at domain logical coord (0, 0, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } + if(rowMin && colMax && planeMin && doRecv) + { + /* corner at domain logical coord (1, 0, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx - 1; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } + if(rowMin && colMax && planeMax) + { + /* corner at domain logical coord (1, 0, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1) + (dx - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } + if(rowMax && colMin && planeMin && doRecv) + { + /* corner at domain logical coord (0, 1, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * (dy - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } + if(rowMax && colMin && planeMax) + { + /* corner at domain logical coord (0, 1, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * (dz - 1) + dx * (dy - 1); + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } + if(rowMax && colMax && planeMin && doRecv) + { + /* corner at domain logical coord (1, 1, 0) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy - 1; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } + if(rowMax && colMax && planeMax) + { + /* corner at domain logical coord (1, 1, 1) */ + Real_t* comBuf = &domain.commDataRecv[pmsg * maxPlaneComm + emsg * maxEdgeComm + + cmsg * CACHE_COHERENCE_PAD_REAL]; + Index_t idx = dx * dy * dz - 1; + MPI_Wait(&domain.recvRequest[pmsg + emsg + cmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + (domain.*fieldData[fi])(idx) = comBuf[fi]; + } + ++cmsg; + } +} + +/******************************************/ + +void +CommMonoQ(Domain& domain) +{ + if(domain.numRanks() == 1) return; + + int myRank; + Index_t xferFields = 3; /* delv_xi, delv_eta, delv_zeta */ + Domain_member fieldData[3]; + Index_t fieldOffset[3]; + Index_t maxPlaneComm = xferFields * domain.maxPlaneSize(); + Index_t pmsg = 0; /* plane comm msg */ + Index_t dx = domain.sizeX(); + Index_t dy = domain.sizeY(); + Index_t dz = domain.sizeZ(); + MPI_Status status; + Real_t* srcAddr; + bool rowMin, rowMax, colMin, colMax, planeMin, planeMax; + /* assume communication to 6 neighbors by default */ + rowMin = rowMax = colMin = colMax = planeMin = planeMax = true; + if(domain.rowLoc() == 0) + { + rowMin = false; + } + if(domain.rowLoc() == (domain.tp() - 1)) + { + rowMax = false; + } + if(domain.colLoc() == 0) + { + colMin = false; + } + if(domain.colLoc() == (domain.tp() - 1)) + { + colMax = false; + } + if(domain.planeLoc() == 0) + { + planeMin = false; + } + if(domain.planeLoc() == (domain.tp() - 1)) + { + planeMax = false; + } + + /* point into ghost data area */ + // fieldData[0] = &(domain.delv_xi(domain.numElem())) ; + // fieldData[1] = &(domain.delv_eta(domain.numElem())) ; + // fieldData[2] = &(domain.delv_zeta(domain.numElem())) ; + fieldData[0] = &Domain::delv_xi; + fieldData[1] = &Domain::delv_eta; + fieldData[2] = &Domain::delv_zeta; + fieldOffset[0] = domain.numElem(); + fieldOffset[1] = domain.numElem(); + fieldOffset[2] = domain.numElem(); + + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + + if(planeMin | planeMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dx * dy; + + if(planeMin) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i]; + } + srcAddr += opCount; + fieldOffset[fi] += opCount; + } + ++pmsg; + } + if(planeMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i]; + } + srcAddr += opCount; + fieldOffset[fi] += opCount; + } + ++pmsg; + } + } + + if(rowMin | rowMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dx * dz; + + if(rowMin) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i]; + } + srcAddr += opCount; + fieldOffset[fi] += opCount; + } + ++pmsg; + } + if(rowMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i]; + } + srcAddr += opCount; + fieldOffset[fi] += opCount; + } + ++pmsg; + } + } + if(colMin | colMax) + { + /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */ + Index_t opCount = dy * dz; + + if(colMin) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i]; + } + srcAddr += opCount; + fieldOffset[fi] += opCount; + } + ++pmsg; + } + if(colMax) + { + /* contiguous memory */ + srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm]; + MPI_Wait(&domain.recvRequest[pmsg], &status); + for(Index_t fi = 0; fi < xferFields; ++fi) + { + Domain_member dest = fieldData[fi]; + for(Index_t i = 0; i < opCount; ++i) + { + (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i]; + } + srcAddr += opCount; + } + ++pmsg; + } + } +} + +#endif diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh-init.cc b/projects/rocprofiler-systems/examples/lulesh/lulesh-init.cc new file mode 100644 index 0000000000..55d149e8b2 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh-init.cc @@ -0,0 +1,931 @@ +#include +#if USE_MPI +# include +#endif +#include "lulesh.h" +#include +#include +#include +#include +#include + +static KOKKOS_INLINE_FUNCTION Real_t +CalcElemVolume(const Real_t x0, const Real_t x1, const Real_t x2, const Real_t x3, + const Real_t x4, const Real_t x5, const Real_t x6, const Real_t x7, + const Real_t y0, const Real_t y1, const Real_t y2, const Real_t y3, + const Real_t y4, const Real_t y5, const Real_t y6, const Real_t y7, + const Real_t z0, const Real_t z1, const Real_t z2, const Real_t z3, + const Real_t z4, const Real_t z5, const Real_t z6, const Real_t z7) +{ + Real_t twelveth = Real_t(1.0) / Real_t(12.0); + + Real_t dx61 = x6 - x1; + Real_t dy61 = y6 - y1; + Real_t dz61 = z6 - z1; + + Real_t dx70 = x7 - x0; + Real_t dy70 = y7 - y0; + Real_t dz70 = z7 - z0; + + Real_t dx63 = x6 - x3; + Real_t dy63 = y6 - y3; + Real_t dz63 = z6 - z3; + + Real_t dx20 = x2 - x0; + Real_t dy20 = y2 - y0; + Real_t dz20 = z2 - z0; + + Real_t dx50 = x5 - x0; + Real_t dy50 = y5 - y0; + Real_t dz50 = z5 - z0; + + Real_t dx64 = x6 - x4; + Real_t dy64 = y6 - y4; + Real_t dz64 = z6 - z4; + + Real_t dx31 = x3 - x1; + Real_t dy31 = y3 - y1; + Real_t dz31 = z3 - z1; + + Real_t dx72 = x7 - x2; + Real_t dy72 = y7 - y2; + Real_t dz72 = z7 - z2; + + Real_t dx43 = x4 - x3; + Real_t dy43 = y4 - y3; + Real_t dz43 = z4 - z3; + + Real_t dx57 = x5 - x7; + Real_t dy57 = y5 - y7; + Real_t dz57 = z5 - z7; + + Real_t dx14 = x1 - x4; + Real_t dy14 = y1 - y4; + Real_t dz14 = z1 - z4; + + Real_t dx25 = x2 - x5; + Real_t dy25 = y2 - y5; + Real_t dz25 = z2 - z5; + +#define TRIPLE_PRODUCT(x1, y1, z1, x2, y2, z2, x3, y3, z3) \ + ((x1) * ((y2) * (z3) - (z2) * (y3)) + (x2) * ((z1) * (y3) - (y1) * (z3)) + \ + (x3) * ((y1) * (z2) - (z1) * (y2))) + + Real_t volume = TRIPLE_PRODUCT(dx31 + dx72, dx63, dx20, dy31 + dy72, dy63, dy20, + dz31 + dz72, dz63, dz20) + + TRIPLE_PRODUCT(dx43 + dx57, dx64, dx70, dy43 + dy57, dy64, dy70, + dz43 + dz57, dz64, dz70) + + TRIPLE_PRODUCT(dx14 + dx25, dx61, dx50, dy14 + dy25, dy61, dy50, + dz14 + dz25, dz61, dz50); + +#undef TRIPLE_PRODUCT + + volume *= twelveth; + + return volume; +} + +/******************************************/ + +KOKKOS_INLINE_FUNCTION +Real_t +CalcElemVolume(const Real_t x[8], const Real_t y[8], const Real_t z[8]) +{ + return CalcElemVolume(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], y[0], y[1], + y[2], y[3], y[4], y[5], y[6], y[7], z[0], z[1], z[2], z[3], + z[4], z[5], z[6], z[7]); +} + +///////////////////////////////////////////////////////////////////// +Domain::Domain(Int_t numRanks, Index_t colLoc, Index_t rowLoc, Index_t planeLoc, + Index_t nx, int tp, int nr, int balance, Int_t cost) +: m_e_cut(Real_t(1.0e-7)) +, m_p_cut(Real_t(1.0e-7)) +, m_q_cut(Real_t(1.0e-7)) +, m_v_cut(Real_t(1.0e-10)) +, m_u_cut(Real_t(1.0e-7)) +, m_hgcoef(Real_t(3.0)) +, m_ss4o3(Real_t(4.0) / Real_t(3.0)) +, m_qstop(Real_t(1.0e+12)) +, m_monoq_max_slope(Real_t(1.0)) +, m_monoq_limiter_mult(Real_t(2.0)) +, m_qlc_monoq(Real_t(0.5)) +, m_qqc_monoq(Real_t(2.0) / Real_t(3.0)) +, m_qqc(Real_t(2.0)) +, m_eosvmax(Real_t(1.0e+9)) +, m_eosvmin(Real_t(1.0e-9)) +, m_pmin(Real_t(0.)) +, m_emin(Real_t(-1.0e+15)) +, m_dvovmax(Real_t(0.1)) +, m_refdens(Real_t(1.0)) +, +// +// set pointers to (potentially) "new'd" arrays to null to +// simplify deallocation. +// +m_regNumList(0) +// m_nodeElemStart(0), +// m_nodeElemCornerList(0), +// m_regElemSize(0), +// m_regElemlist(0) +#if USE_MPI +, commDataSend(0) +, commDataRecv(0) +#endif +{ + Index_t edgeElems = nx; + Index_t edgeNodes = edgeElems + 1; + this->cost() = cost; + + m_tp = tp; + m_numRanks = numRanks; + + /////////////////////////////// + // Initialize Sedov Mesh + /////////////////////////////// + + // construct a uniform box for this processor + + m_colLoc = colLoc; + m_rowLoc = rowLoc; + m_planeLoc = planeLoc; + + m_sizeX = edgeElems; + m_sizeY = edgeElems; + m_sizeZ = edgeElems; + m_numElem = edgeElems * edgeElems * edgeElems; + + m_numNode = edgeNodes * edgeNodes * edgeNodes; + + m_regNumList = Allocate(numElem()); // material indexset + + // Elem-centered + AllocateElemPersistent(numElem()); + + // Node-centered + AllocateNodePersistent(numNode()); + + SetupCommBuffers(edgeNodes); + + // Basic Field Initialization + Kokkos::deep_copy(m_e, 0.0); + Kokkos::deep_copy(m_p, 0.0); + Kokkos::deep_copy(m_q, 0.0); + Kokkos::deep_copy(m_ss, 0.0); + + // Note - v initializes to 1.0, not 0.0! + Kokkos::deep_copy(m_v, 1.0); + + Kokkos::deep_copy(m_xd, 0.0); + Kokkos::deep_copy(m_yd, 0.0); + Kokkos::deep_copy(m_zd, 0.0); + + Kokkos::deep_copy(m_xdd, 0.0); + Kokkos::deep_copy(m_ydd, 0.0); + Kokkos::deep_copy(m_zdd, 0.0); + + Kokkos::deep_copy(m_nodalMass, 0.0); + + BuildMesh(nx, edgeNodes, edgeElems); + + SetupThreadSupportStructures(); + + // Setup region index sets. For now, these are constant sized + // throughout the run, but could be changed every cycle to + // simulate effects of ALE on the lagrange solver + CreateRegionIndexSets(nr, balance); + + // Setup symmetry nodesets + SetupSymmetryPlanes(edgeNodes); + + // Setup element connectivities + SetupElementConnectivities(edgeElems); + + // Setup symmetry planes and free surface boundary arrays + SetupBoundaryConditions(edgeElems); + + // Setup defaults + + // These can be changed (requires recompile) if you want to run + // with a fixed timestep, or to a different end time, but it's + // probably easier/better to just run a fixed number of timesteps + // using the -i flag in 2.x + + dtfixed() = Real_t(-1.0e-6); // Negative means use courant condition + stoptime() = Real_t(1.0e-2); // *Real_t(edgeElems*tp/45.0) ; + + // Initial conditions + deltatimemultlb() = Real_t(1.1); + deltatimemultub() = Real_t(1.2); + dtcourant() = Real_t(1.0e+20); + dthydro() = Real_t(1.0e+20); + dtmax() = Real_t(1.0e-2); + time() = Real_t(0.); + cycle() = Int_t(0); + + // With C++17 requirement we could just run this on the device + // without creating temporary host copies + auto h_nodelist = Kokkos::create_mirror_view(m_nodelist); + auto h_x = Kokkos::create_mirror_view(m_x); + auto h_y = Kokkos::create_mirror_view(m_y); + auto h_z = Kokkos::create_mirror_view(m_z); + auto h_volo = Kokkos::create_mirror_view(m_volo); + auto h_elemMass = Kokkos::create_mirror_view(m_elemMass); + auto h_nodalMass = Kokkos::create_mirror_view(m_nodalMass); + Kokkos::deep_copy(h_nodelist, m_nodelist); + Kokkos::deep_copy(h_x, m_x); + Kokkos::deep_copy(h_y, m_y); + Kokkos::deep_copy(h_z, m_z); + // initialize field data + for(Index_t i = 0; i < numElem(); ++i) + { + Real_t x_local[8], y_local[8], z_local[8]; + for(Index_t lnode = 0; lnode < 8; ++lnode) + { + Index_t gnode = h_nodelist(i, lnode); + x_local[lnode] = h_x(gnode); + y_local[lnode] = h_y(gnode); + z_local[lnode] = h_z(gnode); + } + + // volume calculations + Real_t volume = CalcElemVolume(x_local, y_local, z_local); + h_volo(i) = volume; + h_elemMass(i) = volume; + for(Index_t j = 0; j < 8; ++j) + { + Index_t idx = h_nodelist(i, j); + h_nodalMass(idx) += volume / Real_t(8.0); + } + } + + Kokkos::deep_copy(m_volo, h_volo); + Kokkos::deep_copy(m_elemMass, h_elemMass); + Kokkos::deep_copy(m_nodalMass, h_nodalMass); + + // deposit initial energy + // An energy of 3.948746e+7 is correct for a problem with + // 45 zones along a side - we need to scale it + const Real_t ebase = Real_t(3.948746e+7); + Real_t scale = (nx * m_tp) / Real_t(45.0); + Real_t einit = ebase * scale * scale * scale; + if(m_rowLoc + m_colLoc + m_planeLoc == 0) + { + // Dump into the first zone (which we know is in the corner) + // of the domain that sits at the origin + Kokkos::deep_copy(Kokkos::subview(m_e, 0), einit); + // e(0) = einit; + } + // set initial deltatime base on analytic CFL calculation + deltatime() = (Real_t(.5) * cbrt(h_volo(0))) / sqrt(Real_t(2.0) * einit); + +} // End constructor + +//////////////////////////////////////////////////////////////////////////////// +Domain::~Domain() +{ + /* Release(&m_regNumList); + Release(&m_nodeElemStart); + Release(&m_nodeElemCornerList); + Release(&m_regElemSize); + for (Index_t i=0 ; i nodeElemCount("nodeElemCount", numNode()); + auto h_nodelist = Kokkos::create_mirror_view(m_nodelist); + Kokkos::deep_copy(h_nodelist, m_nodelist); + + for(Index_t i = 0; i < numElem(); ++i) + { + for(Index_t j = 0; j < 8; ++j) + { + ++(nodeElemCount[h_nodelist(i, j)]); + } + } + + m_nodeElemStart = Kokkos::View("m_nodeElemStart", numNode() + 1); + auto h_nodeElemStart = Kokkos::create_mirror_view(m_nodeElemStart); + + h_nodeElemStart[0] = 0; + + for(Index_t i = 1; i <= numNode(); ++i) + { + h_nodeElemStart[i] = h_nodeElemStart[i - 1] + nodeElemCount[i - 1]; + } + + m_nodeElemCornerList = + Kokkos::View("nodeElemCornerList", h_nodeElemStart[numNode()]); + auto h_nodeElemCornerList = Kokkos::create_mirror_view(m_nodeElemCornerList); + + for(Index_t i = 0; i < numNode(); ++i) + { + nodeElemCount[i] = 0; + } + + for(Index_t i = 0; i < numElem(); ++i) + { + for(Index_t j = 0; j < 8; ++j) + { + Index_t m = h_nodelist(i, j); + Index_t k = i * 8 + j; + Index_t offset = h_nodeElemStart[m] + nodeElemCount[m]; + h_nodeElemCornerList[offset] = k; + ++(nodeElemCount[m]); + } + } + + Index_t clSize = h_nodeElemStart[numNode()]; + for(Index_t i = 0; i < clSize; ++i) + { + Index_t clv = h_nodeElemCornerList[i]; + if((clv < 0) || (clv > numElem() * 8)) + { + fprintf( + stderr, + "AllocateNodeElemIndexes(): nodeElemCornerList entry out of range!\n"); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, -1); +#else + exit(-1); +#endif + } + } + Kokkos::deep_copy(m_nodeElemCornerList, h_nodeElemCornerList); + Kokkos::deep_copy(m_nodeElemStart, h_nodeElemStart); +} + +//////////////////////////////////////////////////////////////////////////////// +void +Domain::SetupCommBuffers(Int_t edgeNodes) +{ + // allocate a buffer large enough for nodal ghost data + Index_t maxEdgeSize = MAX(this->sizeX(), MAX(this->sizeY(), this->sizeZ())) + 1; + m_maxPlaneSize = CACHE_ALIGN_REAL(maxEdgeSize * maxEdgeSize); + m_maxEdgeSize = CACHE_ALIGN_REAL(maxEdgeSize); + + // assume communication to 6 neighbors by default + m_rowMin = (m_rowLoc == 0) ? 0 : 1; + m_rowMax = (m_rowLoc == m_tp - 1) ? 0 : 1; + m_colMin = (m_colLoc == 0) ? 0 : 1; + m_colMax = (m_colLoc == m_tp - 1) ? 0 : 1; + m_planeMin = (m_planeLoc == 0) ? 0 : 1; + m_planeMax = (m_planeLoc == m_tp - 1) ? 0 : 1; + +#if USE_MPI + // account for face communication + Index_t comBufSize = + (m_rowMin + m_rowMax + m_colMin + m_colMax + m_planeMin + m_planeMax) * + m_maxPlaneSize * MAX_FIELDS_PER_MPI_COMM; + + // account for edge communication + comBufSize += + ((m_rowMin & m_colMin) + (m_rowMin & m_planeMin) + (m_colMin & m_planeMin) + + (m_rowMax & m_colMax) + (m_rowMax & m_planeMax) + (m_colMax & m_planeMax) + + (m_rowMax & m_colMin) + (m_rowMin & m_planeMax) + (m_colMin & m_planeMax) + + (m_rowMin & m_colMax) + (m_rowMax & m_planeMin) + (m_colMax & m_planeMin)) * + m_maxEdgeSize * MAX_FIELDS_PER_MPI_COMM; + + // account for corner communication + // factor of 16 is so each buffer has its own cache line + comBufSize += + ((m_rowMin & m_colMin & m_planeMin) + (m_rowMin & m_colMin & m_planeMax) + + (m_rowMin & m_colMax & m_planeMin) + (m_rowMin & m_colMax & m_planeMax) + + (m_rowMax & m_colMin & m_planeMin) + (m_rowMax & m_colMin & m_planeMax) + + (m_rowMax & m_colMax & m_planeMin) + (m_rowMax & m_colMax & m_planeMax)) * + CACHE_COHERENCE_PAD_REAL; + + this->commDataSend = Allocate(comBufSize); + this->commDataRecv = Allocate(comBufSize); + // prevent floating point exceptions + memset(this->commDataSend, 0, comBufSize * sizeof(Real_t)); + memset(this->commDataRecv, 0, comBufSize * sizeof(Real_t)); +#endif + + // Boundary nodesets + if(m_colLoc == 0) Kokkos::resize(m_symmX, edgeNodes * edgeNodes); + if(m_rowLoc == 0) Kokkos::resize(m_symmY, edgeNodes * edgeNodes); + if(m_planeLoc == 0) Kokkos::resize(m_symmZ, edgeNodes * edgeNodes); +} + +//////////////////////////////////////////////////////////////////////////////// +void +Domain::CreateRegionIndexSets(Int_t nr, Int_t balance) +{ +#if USE_MPI + Index_t myRank; + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + srand(myRank); +#else + srand(0); + Index_t myRank = 0; +#endif + this->numReg() = nr; + m_regElemSize = Allocate(numReg()); + auto row_map = Kokkos::View("regElemlist::row_map", numReg() + 1); + auto h_row_map = Kokkos::create_mirror_view(row_map); + auto entries = Kokkos::View("regElemlist::entries", numElem()); + m_regElemlist = t_regElemlist(entries, row_map); + auto h_regElemlist = typename t_regElemlist::HostMirror( + Kokkos::create_mirror_view(m_regElemlist.entries), h_row_map); + Index_t nextIndex = 0; + // if we only have one region just fill it + // Fill out the regNumList with material numbers, which are always + // the region index plus one + if(numReg() == 1) + { + while(nextIndex < numElem()) + { + this->regNumList(nextIndex) = 1; + nextIndex++; + } + regElemSize(0) = 0; + } + // If we have more than one region distribute the elements. + else + { + Int_t regionNum; + Int_t regionVar; + Int_t lastReg = -1; + Int_t binSize; + Index_t elements; + Index_t runto = 0; + Int_t costDenominator = 0; + Kokkos::View regBinEnd("regBinEnd", numReg()); + // Determine the relative weights of all the regions. This is based off the -b + // flag. Balance is the value passed into b. + for(Index_t i = 0; i < numReg(); ++i) + { + regElemSize(i) = 0; + costDenominator += pow((i + 1), balance); // Total sum of all regions weights + regBinEnd[i] = + costDenominator; // Chance of hitting a given region is (regBinEnd[i] - + // regBinEdn[i-1])/costDenominator + } + // Until all elements are assigned + while(nextIndex < numElem()) + { + // pick the region + regionVar = rand() % costDenominator; + Index_t i = 0; + while(regionVar >= regBinEnd[i]) + i++; + // rotate the regions based on MPI rank. Rotation is Rank % NumRegions this + // makes each domain have a different region with the highest representation + regionNum = ((i + myRank) % numReg()) + 1; + // make sure we don't pick the same region twice in a row + while(regionNum == lastReg) + { + regionVar = rand() % costDenominator; + i = 0; + while(regionVar >= regBinEnd[i]) + i++; + regionNum = ((i + myRank) % numReg()) + 1; + } + // Pick the bin size of the region and determine the number of elements. + binSize = rand() % 1000; + if(binSize < 773) + { + elements = rand() % 15 + 1; + } + else if(binSize < 937) + { + elements = rand() % 16 + 16; + } + else if(binSize < 970) + { + elements = rand() % 32 + 32; + } + else if(binSize < 974) + { + elements = rand() % 64 + 64; + } + else if(binSize < 978) + { + elements = rand() % 128 + 128; + } + else if(binSize < 981) + { + elements = rand() % 256 + 256; + } + else + elements = rand() % 1537 + 512; + runto = elements + nextIndex; + // Store the elements. If we hit the end before we run out of elements then + // just stop. + while(nextIndex < runto && nextIndex < numElem()) + { + this->regNumList(nextIndex) = regionNum; + nextIndex++; + } + lastReg = regionNum; + } + } + // Convert regNumList to region index sets + // First, count size of each region + for(Index_t i = 0; i < numElem(); ++i) + { + int r = this->regNumList(i) - 1; // region index == regnum-1 + regElemSize(r)++; + } + // Second, allocate each region index set + for(Index_t i = 0; i < numReg(); ++i) + { + h_row_map(i + 1) = regElemSize(i); + regElemSize(i) = 0; + } + // Third, fill index sets + for(Index_t i = 0; i < numElem(); ++i) + { + Index_t r = regNumList(i) - 1; // region index == regnum-1 + Index_t regndx = regElemSize(r)++; // Note increment + h_regElemlist.entries(h_row_map(r) + regndx) = i; + } + Kokkos::deep_copy(m_regElemlist.entries, h_regElemlist.entries); + Kokkos::deep_copy(row_map, h_row_map); +} + +///////////////////////////////////////////////////////////// +void +Domain::SetupSymmetryPlanes(Int_t edgeNodes) +{ + Index_t nidx = 0; + auto h_symmZ = Kokkos::create_mirror_view(m_symmZ); + auto h_symmY = Kokkos::create_mirror_view(m_symmY); + auto h_symmX = Kokkos::create_mirror_view(m_symmX); + for(Index_t i = 0; i < edgeNodes; ++i) + { + Index_t planeInc = i * edgeNodes * edgeNodes; + Index_t rowInc = i * edgeNodes; + for(Index_t j = 0; j < edgeNodes; ++j) + { + if(m_planeLoc == 0) + { + h_symmZ[nidx] = rowInc + j; + } + if(m_rowLoc == 0) + { + h_symmY[nidx] = planeInc + j; + } + if(m_colLoc == 0) + { + h_symmX[nidx] = planeInc + j * edgeNodes; + } + ++nidx; + } + } + Kokkos::deep_copy(m_symmZ, h_symmZ); + Kokkos::deep_copy(m_symmY, h_symmY); + Kokkos::deep_copy(m_symmX, h_symmX); +} + +///////////////////////////////////////////////////////////// +void +Domain::SetupElementConnectivities(Int_t edgeElems) +{ + // With C++17 we wouldn't need to do this and could run this on the GPU + // using class lambdas + auto h_lxim = Kokkos::create_mirror_view(m_lxim); + auto h_lxip = Kokkos::create_mirror_view(m_lxip); + h_lxim(0) = 0; + for(Index_t i = 1; i < numElem(); ++i) + { + h_lxim(i) = i - 1; + h_lxip(i - 1) = i; + } + h_lxip(numElem() - 1) = numElem() - 1; + Kokkos::deep_copy(m_lxim, h_lxim); + Kokkos::deep_copy(m_lxip, h_lxip); + + auto h_letam = Kokkos::create_mirror_view(m_letam); + auto h_letap = Kokkos::create_mirror_view(m_letap); + for(Index_t i = 0; i < edgeElems; ++i) + { + h_letam(i) = i; + h_letap(numElem() - edgeElems + i) = numElem() - edgeElems + i; + } + for(Index_t i = edgeElems; i < numElem(); ++i) + { + h_letam(i) = i - edgeElems; + h_letap(i - edgeElems) = i; + } + Kokkos::deep_copy(m_letam, h_letam); + Kokkos::deep_copy(m_letap, h_letap); + + auto h_lzetam = Kokkos::create_mirror_view(m_lzetam); + auto h_lzetap = Kokkos::create_mirror_view(m_lzetap); + for(Index_t i = 0; i < edgeElems * edgeElems; ++i) + { + h_lzetam(i) = i; + h_lzetap(numElem() - edgeElems * edgeElems + i) = + numElem() - edgeElems * edgeElems + i; + } + for(Index_t i = edgeElems * edgeElems; i < numElem(); ++i) + { + h_lzetam(i) = i - edgeElems * edgeElems; + h_lzetap(i - edgeElems * edgeElems) = i; + } + Kokkos::deep_copy(m_lzetam, h_lzetam); + Kokkos::deep_copy(m_lzetap, h_lzetap); +} + +///////////////////////////////////////////////////////////// +void +Domain::SetupBoundaryConditions(Int_t edgeElems) +{ + Index_t ghostIdx[6]; // offsets to ghost locations + auto h_elemBC = Kokkos::create_mirror_view(m_elemBC); + auto h_lzetam = Kokkos::create_mirror_view(m_lzetam); + auto h_lzetap = Kokkos::create_mirror_view(m_lzetap); + auto h_letam = Kokkos::create_mirror_view(m_letam); + auto h_letap = Kokkos::create_mirror_view(m_letap); + auto h_lxim = Kokkos::create_mirror_view(m_lxim); + auto h_lxip = Kokkos::create_mirror_view(m_lxip); + Kokkos::deep_copy(h_lzetam, m_lzetam); + Kokkos::deep_copy(h_lzetap, m_lzetap); + Kokkos::deep_copy(h_letam, m_letam); + Kokkos::deep_copy(h_letap, m_letap); + Kokkos::deep_copy(h_lxim, m_lxim); + Kokkos::deep_copy(h_lxip, m_lxip); + + // set up boundary condition information + for(Index_t i = 0; i < numElem(); ++i) + { + h_elemBC(i) = Int_t(0); + } + + for(Index_t i = 0; i < 6; ++i) + { + ghostIdx[i] = INT_MIN; + } + + Int_t pidx = numElem(); + if(m_planeMin != 0) + { + ghostIdx[0] = pidx; + pidx += sizeX() * sizeY(); + } + + if(m_planeMax != 0) + { + ghostIdx[1] = pidx; + pidx += sizeX() * sizeY(); + } + + if(m_rowMin != 0) + { + ghostIdx[2] = pidx; + pidx += sizeX() * sizeZ(); + } + + if(m_rowMax != 0) + { + ghostIdx[3] = pidx; + pidx += sizeX() * sizeZ(); + } + + if(m_colMin != 0) + { + ghostIdx[4] = pidx; + pidx += sizeY() * sizeZ(); + } + + if(m_colMax != 0) + { + ghostIdx[5] = pidx; + } + + // symmetry plane or free surface BCs + for(Index_t i = 0; i < edgeElems; ++i) + { + Index_t planeInc = i * edgeElems * edgeElems; + Index_t rowInc = i * edgeElems; + for(Index_t j = 0; j < edgeElems; ++j) + { + if(m_planeLoc == 0) + { + h_elemBC(rowInc + j) |= ZETA_M_SYMM; + } + else + { + h_elemBC(rowInc + j) |= ZETA_M_COMM; + h_lzetam(rowInc + j) = ghostIdx[0] + rowInc + j; + } + + if(m_planeLoc == m_tp - 1) + { + h_elemBC(rowInc + j + numElem() - edgeElems * edgeElems) |= ZETA_P_FREE; + } + else + { + h_elemBC(rowInc + j + numElem() - edgeElems * edgeElems) |= ZETA_P_COMM; + h_lzetap(rowInc + j + numElem() - edgeElems * edgeElems) = + ghostIdx[1] + rowInc + j; + } + + if(m_rowLoc == 0) + { + h_elemBC(planeInc + j) |= ETA_M_SYMM; + } + else + { + h_elemBC(planeInc + j) |= ETA_M_COMM; + h_letam(planeInc + j) = ghostIdx[2] + rowInc + j; + } + + if(m_rowLoc == m_tp - 1) + { + h_elemBC(planeInc + j + edgeElems * edgeElems - edgeElems) |= ETA_P_FREE; + } + else + { + h_elemBC(planeInc + j + edgeElems * edgeElems - edgeElems) |= ETA_P_COMM; + h_letap(planeInc + j + edgeElems * edgeElems - edgeElems) = + ghostIdx[3] + rowInc + j; + } + + if(m_colLoc == 0) + { + h_elemBC(planeInc + j * edgeElems) |= XI_M_SYMM; + } + else + { + h_elemBC(planeInc + j * edgeElems) |= XI_M_COMM; + h_lxim(planeInc + j * edgeElems) = ghostIdx[4] + rowInc + j; + } + + if(m_colLoc == m_tp - 1) + { + h_elemBC(planeInc + j * edgeElems + edgeElems - 1) |= XI_P_FREE; + } + else + { + h_elemBC(planeInc + j * edgeElems + edgeElems - 1) |= XI_P_COMM; + h_lxip(planeInc + j * edgeElems + edgeElems - 1) = + ghostIdx[5] + rowInc + j; + } + } + } + Kokkos::deep_copy(m_elemBC, h_elemBC); + Kokkos::deep_copy(m_lzetam, h_lzetam); + Kokkos::deep_copy(m_lzetap, h_lzetap); + Kokkos::deep_copy(m_letam, h_letam); + Kokkos::deep_copy(m_letap, h_letap); + Kokkos::deep_copy(m_lxim, h_lxim); + Kokkos::deep_copy(m_lxip, h_lxip); +} + +/////////////////////////////////////////////////////////////////////////// +void +InitMeshDecomp(Int_t numRanks, Int_t myRank, Int_t* col, Int_t* row, Int_t* plane, + Int_t* side) +{ + Int_t testProcs; + Int_t dx, dy, dz; + Int_t myDom; + + // Assume cube processor layout for now + testProcs = Int_t(cbrt(Real_t(numRanks)) + 0.5); + if(testProcs * testProcs * testProcs != numRanks) + { + printf("Num processors must be a cube of an integer (1, 8, 27, ...)\n"); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, -1); +#else + exit(-1); +#endif + } + if(sizeof(Real_t) != 4 && sizeof(Real_t) != 8) + { + printf("MPI operations only support float and double right now...\n"); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, -1); +#else + exit(-1); +#endif + } + if(MAX_FIELDS_PER_MPI_COMM > CACHE_COHERENCE_PAD_REAL) + { + printf("corner element comm buffers too small. Fix code.\n"); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, -1); +#else + exit(-1); +#endif + } + + dx = testProcs; + dy = testProcs; + dz = testProcs; + + // temporary test + if(dx * dy * dz != numRanks) + { + printf("error -- must have as many domains as procs\n"); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, -1); +#else + exit(-1); +#endif + } + Int_t remainder = dx * dy * dz % numRanks; + if(myRank < remainder) + { + myDom = myRank * (1 + (dx * dy * dz / numRanks)); + } + else + { + myDom = remainder * (1 + (dx * dy * dz / numRanks)) + + (myRank - remainder) * (dx * dy * dz / numRanks); + } + + *col = myDom % dx; + *row = (myDom / dx) % dy; + *plane = myDom / (dx * dy); + *side = testProcs; + + return; +} diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh-util.cc b/projects/rocprofiler-systems/examples/lulesh/lulesh-util.cc new file mode 100644 index 0000000000..a8856429c6 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh-util.cc @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#if USE_MPI +# include +#endif +#include "lulesh.h" + +/* Helper function for converting strings to ints, with error checking */ +int +StrToInt(const char* token, int* retVal) +{ + const char* c; + char* endptr; + const int decimal_base = 10; + + if(token == NULL) return 0; + + c = token; + *retVal = (int) strtol(c, &endptr, decimal_base); + if((endptr != c) && ((*endptr == ' ') || (*endptr == '\0'))) + return 1; + else + return 0; +} + +static void +PrintCommandLineOptions(char* execname, int myRank) +{ + if(myRank == 0) + { + printf("Usage: %s [opts]\n", execname); + printf(" where [opts] is one or more of:\n"); + printf(" -q : quiet mode - suppress all stdout\n"); + printf(" -i : number of cycles to run\n"); + printf(" -s : length of cube mesh along side\n"); + printf(" -r : Number of distinct regions (def: 11)\n"); + printf(" -b : Load balance between regions of a domain (def: 1)\n"); + printf(" -c : Extra cost of more expensive regions (def: 1)\n"); + printf(" -f : Number of files to split viz dump into (def: " + "(np+10)/9)\n"); + printf(" -p : Print out progress\n"); + printf( + " -v : Output viz file (requires compiling with -DVIZ_MESH\n"); + printf(" -h : This message\n"); + printf("\n\n"); + } +} + +static void +ParseError(const char* message, int myRank) +{ + if(myRank == 0) + { + printf("%s\n", message); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, -1); +#else + exit(-1); +#endif + } +} + +void +ParseCommandLineOptions(int argc, char* argv[], int myRank, struct cmdLineOpts* opts) +{ + if(argc > 1) + { + int i = 1; + + while(i < argc) + { + int ok; + /* -i */ + if(strcmp(argv[i], "-i") == 0) + { + if(i + 1 >= argc) + { + ParseError("Missing integer argument to -i", myRank); + } + ok = StrToInt(argv[i + 1], &(opts->its)); + if(!ok) + { + ParseError("Parse Error on option -i integer value required after " + "argument\n", + myRank); + } + i += 2; + } + /* -s */ + else if(strcmp(argv[i], "-s") == 0) + { + if(i + 1 >= argc) + { + ParseError("Missing integer argument to -s\n", myRank); + } + ok = StrToInt(argv[i + 1], &(opts->nx)); + if(!ok) + { + ParseError("Parse Error on option -s integer value required after " + "argument\n", + myRank); + } + i += 2; + } + /* -r */ + else if(strcmp(argv[i], "-r") == 0) + { + if(i + 1 >= argc) + { + ParseError("Missing integer argument to -r\n", myRank); + } + ok = StrToInt(argv[i + 1], &(opts->numReg)); + if(!ok) + { + ParseError("Parse Error on option -r integer value required after " + "argument\n", + myRank); + } + i += 2; + } + /* -f */ + else if(strcmp(argv[i], "-f") == 0) + { + if(i + 1 >= argc) + { + ParseError("Missing integer argument to -f\n", myRank); + } + ok = StrToInt(argv[i + 1], &(opts->numFiles)); + if(!ok) + { + ParseError("Parse Error on option -f integer value required after " + "argument\n", + myRank); + } + i += 2; + } + /* -p */ + else if(strcmp(argv[i], "-p") == 0) + { + opts->showProg = 1; + i++; + } + /* -q */ + else if(strcmp(argv[i], "-q") == 0) + { + opts->quiet = 1; + i++; + } + /* -q */ + else if(strcmp(argv[i], "-a") == 0) + { + opts->do_atomic = 1; + i++; + } + else if(strcmp(argv[i], "-b") == 0) + { + if(i + 1 >= argc) + { + ParseError("Missing integer argument to -b\n", myRank); + } + ok = StrToInt(argv[i + 1], &(opts->balance)); + if(!ok) + { + ParseError("Parse Error on option -b integer value required after " + "argument\n", + myRank); + } + i += 2; + } + else if(strcmp(argv[i], "-c") == 0) + { + if(i + 1 >= argc) + { + ParseError("Missing integer argument to -c\n", myRank); + } + ok = StrToInt(argv[i + 1], &(opts->cost)); + if(!ok) + { + ParseError("Parse Error on option -c integer value required after " + "argument\n", + myRank); + } + i += 2; + } + /* -v */ + else if(strcmp(argv[i], "-v") == 0) + { +#if VIZ_MESH + opts->viz = 1; +#else + ParseError("Use of -v requires compiling with -DVIZ_MESH\n", myRank); +#endif + i++; + } + /* -h */ + else if(strcmp(argv[i], "-h") == 0) + { + PrintCommandLineOptions(argv[0], myRank); +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, 0); +#else + exit(0); +#endif + } + else + { + char msg[80]; + PrintCommandLineOptions(argv[0], myRank); + sprintf(msg, "ERROR: Unknown command line argument: %s\n", argv[i]); + ParseError(msg, myRank); + } + } + } +} + +///////////////////////////////////////////////////////////////////// + +void +VerifyAndWriteFinalOutput(Real_t elapsed_time, Domain& locDom, Int_t nx, Int_t numRanks) +{ + // GrindTime1 only takes a single domain into account, and is thus a good way to + // measure processor speed indepdendent of MPI parallelism. GrindTime2 takes into + // account speedups from MPI parallelism + Real_t grindTime1 = ((elapsed_time * 1e6) / locDom.cycle()) / (nx * nx * nx); + Real_t grindTime2 = + ((elapsed_time * 1e6) / locDom.cycle()) / (nx * nx * nx * numRanks); + + Index_t ElemId = 0; + auto h_e = Kokkos::create_mirror_view(locDom.e_view()); + Kokkos::deep_copy(h_e, locDom.e_view()); + printf("Run completed: \n"); + printf(" Problem size = %i \n", nx); + printf(" MPI tasks = %i \n", numRanks); + printf(" Iteration count = %i \n", locDom.cycle()); + printf(" Final Origin Energy = %12.6e \n", h_e(ElemId)); + + Real_t MaxAbsDiff = Real_t(0.0); + Real_t TotalAbsDiff = Real_t(0.0); + Real_t MaxRelDiff = Real_t(0.0); + + for(Index_t j = 0; j < nx; ++j) + { + for(Index_t k = j + 1; k < nx; ++k) + { + Real_t AbsDiff = FABS(h_e(j * nx + k) - h_e(k * nx + j)); + TotalAbsDiff += AbsDiff; + + if(MaxAbsDiff < AbsDiff) MaxAbsDiff = AbsDiff; + + Real_t RelDiff = AbsDiff / h_e(k * nx + j); + + if(MaxRelDiff < RelDiff) MaxRelDiff = RelDiff; + } + } + + // Quick symmetry check + printf(" Testing Plane 0 of Energy Array on rank 0:\n"); + printf(" MaxAbsDiff = %12.6e\n", MaxAbsDiff); + printf(" TotalAbsDiff = %12.6e\n", TotalAbsDiff); + printf(" MaxRelDiff = %12.6e\n\n", MaxRelDiff); + + // Timing information + printf("\nElapsed time = %10.2f (s)\n", elapsed_time); + printf("Grind time (us/z/c) = %10.8g (per dom) (%10.8g overall)\n", grindTime1, + grindTime2); + printf("FOM = %10.8g (z/s)\n\n", + 1000.0 / grindTime2); // zones per second + + return; +} diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh-viz.cc b/projects/rocprofiler-systems/examples/lulesh/lulesh-viz.cc new file mode 100644 index 0000000000..273435c396 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh-viz.cc @@ -0,0 +1,421 @@ +#include "lulesh.h" +#include +#include +#include +#include + +#ifdef VIZ_MESH + +# ifdef __cplusplus +extern "C" +{ +# endif +# include "silo.h" +# if USE_MPI +# include "pmpio.h" +# endif +# ifdef __cplusplus +} +# endif + +// Function prototypes +static void +DumpDomainToVisit(DBfile* db, Domain& domain, int myRank); +static + +# if USE_MPI + // For some reason, earlier versions of g++ (e.g. 4.2) won't let me + // put the 'static' qualifier on this prototype, even if it's done + // consistently in the prototype and definition + void + DumpMultiblockObjects(DBfile* db, PMPIO_baton_t* bat, char basename[], int numRanks); + +// Callback prototypes for PMPIO interface (only useful if we're +// running parallel) +static void* +LULESH_PMPIO_Create(const char* fname, const char* dname, void* udata); +static void* +LULESH_PMPIO_Open(const char* fname, const char* dname, PMPIO_iomode_t ioMode, + void* udata); +static void +LULESH_PMPIO_Close(void* file, void* udata); + +# else + void + DumpMultiblockObjects(DBfile* db, char basename[], int numRanks); +# endif + +/**********************************************************************/ +void +DumpToVisit(Domain& domain, int numFiles, int myRank, int numRanks) +{ + char subdirName[32]; + char basename[32]; + DBfile* db; + + sprintf(basename, "lulesh_plot_c%d", domain.cycle()); + sprintf(subdirName, "data_%d", myRank); + +# if USE_MPI + + PMPIO_baton_t* bat = + PMPIO_Init(numFiles, PMPIO_WRITE, MPI_COMM_WORLD, 10101, LULESH_PMPIO_Create, + LULESH_PMPIO_Open, LULESH_PMPIO_Close, NULL); + + int myiorank = PMPIO_GroupRank(bat, myRank); + + char fileName[64]; + + if(myiorank == 0) + strcpy(fileName, basename); + else + sprintf(fileName, "%s.%03d", basename, myiorank); + + db = (DBfile*) PMPIO_WaitForBaton(bat, fileName, subdirName); + + DumpDomainToVisit(db, domain, myRank); + + // Processor 0 writes out bit of extra data to its file that + // describes how to stitch all the pieces together + if(myRank == 0) + { + DumpMultiblockObjects(db, bat, basename, numRanks); + } + + PMPIO_HandOffBaton(bat, db); + + PMPIO_Finish(bat); +# else + + db = (DBfile*) DBCreate(basename, DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5X); + + if(db) + { + DBMkDir(db, subdirName); + DBSetDir(db, subdirName); + DumpDomainToVisit(db, domain, myRank); + DumpMultiblockObjects(db, basename, numRanks); + } + else + { + printf("Error writing out viz file - rank %d\n", myRank); + } + +# endif +} + +/**********************************************************************/ + +static void +DumpDomainToVisit(DBfile* db, Domain& domain, int myRank) +{ + int ok = 0; + + /* Create an option list that will give some hints to VisIt for + * printing out the cycle and time in the annotations */ + DBoptlist* optlist; + + /* Write out the mesh connectivity in fully unstructured format */ + int shapetype[1] = { DB_ZONETYPE_HEX }; + int shapesize[1] = { 8 }; + int shapecnt[1] = { domain.numElem() }; + int* conn = Allocate(domain.numElem() * 8); + int ci = 0; + for(int ei = 0; ei < domain.numElem(); ++ei) + { + Index_t* elemToNode = domain.nodelist(ei); + for(int ni = 0; ni < 8; ++ni) + { + conn[ci++] = elemToNode[ni]; + } + } + ok += DBPutZonelist2(db, "connectivity", domain.numElem(), 3, conn, + domain.numElem() * 8, 0, 0, 0, /* Not carrying ghost zones */ + shapetype, shapesize, shapecnt, 1, NULL); + Release(&conn); + + /* Write out the mesh coordinates associated with the mesh */ + const char* coordnames[3] = { "X", "Y", "Z" }; + float* coords[3]; + coords[0] = Allocate(domain.numNode()); + coords[1] = Allocate(domain.numNode()); + coords[2] = Allocate(domain.numNode()); + for(int ni = 0; ni < domain.numNode(); ++ni) + { + coords[0][ni] = float(domain.x(ni)); + coords[1][ni] = float(domain.y(ni)); + coords[2][ni] = float(domain.z(ni)); + } + optlist = DBMakeOptlist(2); + ok += DBAddOption(optlist, DBOPT_DTIME, &domain.time()); + ok += DBAddOption(optlist, DBOPT_CYCLE, &domain.cycle()); + ok += DBPutUcdmesh(db, "mesh", 3, (char**) &coordnames[0], (float**) coords, + domain.numNode(), domain.numElem(), "connectivity", 0, DB_FLOAT, + optlist); + ok += DBFreeOptlist(optlist); + Release(&coords[2]); + Release(&coords[1]); + Release(&coords[0]); + + /* Write out the materials */ + int* matnums = Allocate(domain.numReg()); + int dims[1] = { domain.numElem() }; // No mixed elements + for(int i = 0; i < domain.numReg(); ++i) + matnums[i] = i + 1; + + ok += DBPutMaterial(db, "regions", "mesh", domain.numReg(), matnums, + domain.regNumList(), dims, 1, NULL, NULL, NULL, NULL, 0, DB_FLOAT, + NULL); + Release(&matnums); + + /* Write out pressure, energy, relvol, q */ + + float* e = Allocate(domain.numElem()); + for(int ei = 0; ei < domain.numElem(); ++ei) + { + e[ei] = float(domain.e(ei)); + } + ok += DBPutUcdvar1(db, "e", "mesh", e, domain.numElem(), NULL, 0, DB_FLOAT, + DB_ZONECENT, NULL); + Release(&e); + + float* p = Allocate(domain.numElem()); + for(int ei = 0; ei < domain.numElem(); ++ei) + { + p[ei] = float(domain.p(ei)); + } + ok += DBPutUcdvar1(db, "p", "mesh", p, domain.numElem(), NULL, 0, DB_FLOAT, + DB_ZONECENT, NULL); + Release(&p); + + float* v = Allocate(domain.numElem()); + for(int ei = 0; ei < domain.numElem(); ++ei) + { + v[ei] = float(domain.v(ei)); + } + ok += DBPutUcdvar1(db, "v", "mesh", v, domain.numElem(), NULL, 0, DB_FLOAT, + DB_ZONECENT, NULL); + Release(&v); + + float* q = Allocate(domain.numElem()); + for(int ei = 0; ei < domain.numElem(); ++ei) + { + q[ei] = float(domain.q(ei)); + } + ok += DBPutUcdvar1(db, "q", "mesh", q, domain.numElem(), NULL, 0, DB_FLOAT, + DB_ZONECENT, NULL); + Release(&q); + + /* Write out nodal speed, velocities */ + float* zd = Allocate(domain.numNode()); + float* yd = Allocate(domain.numNode()); + float* xd = Allocate(domain.numNode()); + float* speed = Allocate(domain.numNode()); + for(int ni = 0; ni < domain.numNode(); ++ni) + { + xd[ni] = float(domain.xd(ni)); + yd[ni] = float(domain.yd(ni)); + zd[ni] = float(domain.zd(ni)); + speed[ni] = + float(sqrt((xd[ni] * xd[ni]) + (yd[ni] * yd[ni]) + (zd[ni] * zd[ni]))); + } + + ok += DBPutUcdvar1(db, "speed", "mesh", speed, domain.numNode(), NULL, 0, DB_FLOAT, + DB_NODECENT, NULL); + Release(&speed); + + ok += DBPutUcdvar1(db, "xd", "mesh", xd, domain.numNode(), NULL, 0, DB_FLOAT, + DB_NODECENT, NULL); + Release(&xd); + + ok += DBPutUcdvar1(db, "yd", "mesh", yd, domain.numNode(), NULL, 0, DB_FLOAT, + DB_NODECENT, NULL); + Release(&yd); + + ok += DBPutUcdvar1(db, "zd", "mesh", zd, domain.numNode(), NULL, 0, DB_FLOAT, + DB_NODECENT, NULL); + Release(&zd); + + if(ok != 0) + { + printf("Error writing out viz file - rank %d\n", myRank); + } +} + +/**********************************************************************/ + +# if USE_MPI +void +DumpMultiblockObjects(DBfile* db, PMPIO_baton_t* bat, char basename[], int numRanks) +# else +void +DumpMultiblockObjects(DBfile* db, char basename[], int numRanks) +# endif +{ + /* MULTIBLOCK objects to tie together multiple files */ + char** multimeshObjs; + char** multimatObjs; + char*** multivarObjs; + int* blockTypes; + int* varTypes; + int ok = 0; + // Make sure this list matches what's written out above + char vars[][10] = { "p", "e", "v", "q", "speed", "xd", "yd", "zd" }; + int numvars = sizeof(vars) / sizeof(vars[0]); + + // Reset to the root directory of the silo file + DBSetDir(db, "/"); + + // Allocate a bunch of space for building up the string names + multimeshObjs = Allocate(numRanks); + multimatObjs = Allocate(numRanks); + multivarObjs = Allocate(numvars); + blockTypes = Allocate(numRanks); + varTypes = Allocate(numRanks); + + for(int v = 0; v < numvars; ++v) + { + multivarObjs[v] = Allocate(numRanks); + } + + for(int i = 0; i < numRanks; ++i) + { + multimeshObjs[i] = Allocate(64); + multimatObjs[i] = Allocate(64); + for(int v = 0; v < numvars; ++v) + { + multivarObjs[v][i] = Allocate(64); + } + blockTypes[i] = DB_UCDMESH; + varTypes[i] = DB_UCDVAR; + } + + // Build up the multiobject names + for(int i = 0; i < numRanks; ++i) + { +# if USE_MPI + int iorank = PMPIO_GroupRank(bat, i); +# else + int iorank = 0; +# endif + + // delete multivarObjs[i]; + if(iorank == 0) + { + snprintf(multimeshObjs[i], 64, "/data_%d/mesh", i); + snprintf(multimatObjs[i], 64, "/data_%d/regions", i); + for(int v = 0; v < numvars; ++v) + { + snprintf(multivarObjs[v][i], 64, "/data_%d/%s", i, vars[v]); + } + } + else + { + snprintf(multimeshObjs[i], 64, "%s.%03d:/data_%d/mesh", basename, iorank, i); + snprintf(multimatObjs[i], 64, "%s.%03d:/data_%d/regions", basename, iorank, + i); + for(int v = 0; v < numvars; ++v) + { + snprintf(multivarObjs[v][i], 64, "%s.%03d:/data_%d/%s", basename, iorank, + i, vars[v]); + } + } + } + + // Now write out the objects + ok += DBPutMultimesh(db, "mesh", numRanks, (char**) multimeshObjs, blockTypes, NULL); + ok += DBPutMultimat(db, "regions", numRanks, (char**) multimatObjs, NULL); + for(int v = 0; v < numvars; ++v) + { + ok += DBPutMultivar(db, vars[v], numRanks, (char**) multivarObjs[v], varTypes, + NULL); + } + + for(int v = 0; v < numvars; ++v) + { + for(int i = 0; i < numRanks; i++) + { + Release(&multivarObjs[v][i]); + } + Release(&multivarObjs[v]); + } + + // Clean up + for(int i = 0; i < numRanks; i++) + { + Release(&multimeshObjs[i]); + Release(&multimatObjs[i]); + } + Release(&multimeshObjs); + Release(&multimatObjs); + Release(&multivarObjs); + Release(&blockTypes); + Release(&varTypes); + + if(ok != 0) + { + printf("Error writing out multiXXX objs to viz file - rank 0\n"); + } +} + +# if USE_MPI + +/**********************************************************************/ + +static void* +LULESH_PMPIO_Create(const char* fname, const char* dname, void* udata) +{ + /* Create the file */ + DBfile* db = DBCreate(fname, DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5X); + + /* Put the data in a subdirectory, so VisIt only sees the multimesh + * objects we write out in the base file */ + if(db) + { + DBMkDir(db, dname); + DBSetDir(db, dname); + } + return (void*) db; +} + +/**********************************************************************/ + +static void* +LULESH_PMPIO_Open(const char* fname, const char* dname, PMPIO_iomode_t ioMode, + void* udata) +{ + /* Open the file */ + DBfile* db = DBOpen(fname, DB_UNKNOWN, DB_APPEND); + + /* Put the data in a subdirectory, so VisIt only sees the multimesh + * objects we write out in the base file */ + if(db) + { + DBMkDir(db, dname); + DBSetDir(db, dname); + } + return (void*) db; +} + +/**********************************************************************/ + +static void +LULESH_PMPIO_Close(void* file, void* udata) +{ + DBfile* db = (DBfile*) file; + if(db) DBClose(db); +} +# endif + +#else + +void +DumpToVisit(Domain& domain, int numFiles, int myRank, int numRanks) +{ + if(myRank == 0) + { + printf("Must enable -DVIZ_MESH at compile time to call DumpDomain\n"); + } +} + +#endif diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh.cc b/projects/rocprofiler-systems/examples/lulesh/lulesh.cc new file mode 100644 index 0000000000..a4eda9d0f0 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh.cc @@ -0,0 +1,2308 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lulesh.h" + +#include "causal.hpp" + +static Kokkos::View buffer; +static size_t buffer_size; +static size_t buffer_offset; +static int do_atomic; + +void +ResizeBuffer(const size_t size) +{ + buffer_offset = 0; + if(size / sizeof(Real_t) + 1 > buffer_size) + { + buffer_size = size / sizeof(Real_t) + 1; + buffer = Kokkos::View("Buffer", buffer_size); + } +} + +template +Type* +AllocateFromBuffer(const Index_t& count) +{ + const Index_t offset = (count * sizeof(Type) + sizeof(Real_t) - 1) / sizeof(Real_t); + Real_t* ptr = buffer.data() + buffer_offset; + buffer_offset += ((offset + 511) / 512) * 512; + return static_cast(ptr); +} + +static inline void +TimeIncrement(Domain& domain) +{ + Real_t targetdt = domain.stoptime() - domain.time(); + + if((domain.dtfixed() <= Real_t(0.0)) && (domain.cycle() != Int_t(0))) + { + // CAUSAL_BEGIN("TimeIncrement_Iteration") + Real_t ratio; + Real_t olddt = domain.deltatime(); + + Real_t gnewdt = Real_t(1.0e+20); + Real_t newdt; + if(domain.dtcourant() < gnewdt) + { + gnewdt = domain.dtcourant() / Real_t(2.0); + } + if(domain.dthydro() < gnewdt) + { + gnewdt = domain.dthydro() * Real_t(2.0) / Real_t(3.0); + } + +#if USE_MPI + MPI_Allreduce(&gnewdt, &newdt, 1, + ((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE), MPI_MIN, + MPI_COMM_WORLD); +#else + newdt = gnewdt; +#endif + + ratio = newdt / olddt; + if(ratio >= Real_t(1.0)) + { + if(ratio < domain.deltatimemultlb()) + { + newdt = olddt; + } + else if(ratio > domain.deltatimemultub()) + { + newdt = olddt * domain.deltatimemultub(); + } + } + + if(newdt > domain.dtmax()) + { + newdt = domain.dtmax(); + } + domain.deltatime() = newdt; + CAUSAL_PROGRESS_NAMED("TimeIncrement_Iteration"); + // CAUSAL_END("TimeIncrement_Iteration") + } + + if((targetdt > domain.deltatime()) && + (targetdt < (Real_t(4.0) * domain.deltatime() / Real_t(3.0)))) + { + targetdt = Real_t(2.0) * domain.deltatime() / Real_t(3.0); + } + + if(targetdt < domain.deltatime()) + { + domain.deltatime() = targetdt; + } + + domain.time() += domain.deltatime(); + + ++domain.cycle(); +} + +KOKKOS_INLINE_FUNCTION void +CollectDomainNodesToElemNodes(const Domain& domain, const Index_t* elemToNode, + Real_t elemX[8], Real_t elemY[8], Real_t elemZ[8]) +{ + Index_t nd0i = elemToNode[0]; + Index_t nd1i = elemToNode[1]; + Index_t nd2i = elemToNode[2]; + Index_t nd3i = elemToNode[3]; + Index_t nd4i = elemToNode[4]; + Index_t nd5i = elemToNode[5]; + Index_t nd6i = elemToNode[6]; + Index_t nd7i = elemToNode[7]; + + elemX[0] = domain.c_x(nd0i); + elemX[1] = domain.c_x(nd1i); + elemX[2] = domain.c_x(nd2i); + elemX[3] = domain.c_x(nd3i); + elemX[4] = domain.c_x(nd4i); + elemX[5] = domain.c_x(nd5i); + elemX[6] = domain.c_x(nd6i); + elemX[7] = domain.c_x(nd7i); + + elemY[0] = domain.c_y(nd0i); + elemY[1] = domain.c_y(nd1i); + elemY[2] = domain.c_y(nd2i); + elemY[3] = domain.c_y(nd3i); + elemY[4] = domain.c_y(nd4i); + elemY[5] = domain.c_y(nd5i); + elemY[6] = domain.c_y(nd6i); + elemY[7] = domain.c_y(nd7i); + + elemZ[0] = domain.c_z(nd0i); + elemZ[1] = domain.c_z(nd1i); + elemZ[2] = domain.c_z(nd2i); + elemZ[3] = domain.c_z(nd3i); + elemZ[4] = domain.c_z(nd4i); + elemZ[5] = domain.c_z(nd5i); + elemZ[6] = domain.c_z(nd6i); + elemZ[7] = domain.c_z(nd7i); +} + +static inline void +InitStressTermsForElems(Domain& domain, Real_t* sigxx, Real_t* sigyy, Real_t* sigzz, + Index_t numElem) +{ + Kokkos::parallel_for( + "InitStressTermsForElems", numElem, KOKKOS_LAMBDA(const Index_t& i) { + sigxx[i] = sigyy[i] = sigzz[i] = -domain.p(i) - domain.q(i); + }); +} + +KOKKOS_INLINE_FUNCTION void +CalcElemShapeFunctionDerivatives(Real_t const x[], Real_t const y[], Real_t const z[], + Real_t b[][8], Real_t* const volume) +{ + const Real_t x0 = x[0]; + const Real_t x1 = x[1]; + const Real_t x2 = x[2]; + const Real_t x3 = x[3]; + const Real_t x4 = x[4]; + const Real_t x5 = x[5]; + const Real_t x6 = x[6]; + const Real_t x7 = x[7]; + + const Real_t y0 = y[0]; + const Real_t y1 = y[1]; + const Real_t y2 = y[2]; + const Real_t y3 = y[3]; + const Real_t y4 = y[4]; + const Real_t y5 = y[5]; + const Real_t y6 = y[6]; + const Real_t y7 = y[7]; + + const Real_t z0 = z[0]; + const Real_t z1 = z[1]; + const Real_t z2 = z[2]; + const Real_t z3 = z[3]; + const Real_t z4 = z[4]; + const Real_t z5 = z[5]; + const Real_t z6 = z[6]; + const Real_t z7 = z[7]; + + Real_t fjxxi, fjxet, fjxze; + Real_t fjyxi, fjyet, fjyze; + Real_t fjzxi, fjzet, fjzze; + Real_t cjxxi, cjxet, cjxze; + Real_t cjyxi, cjyet, cjyze; + Real_t cjzxi, cjzet, cjzze; + + fjxxi = Real_t(.125) * ((x6 - x0) + (x5 - x3) - (x7 - x1) - (x4 - x2)); + fjxet = Real_t(.125) * ((x6 - x0) - (x5 - x3) + (x7 - x1) - (x4 - x2)); + fjxze = Real_t(.125) * ((x6 - x0) + (x5 - x3) + (x7 - x1) + (x4 - x2)); + + fjyxi = Real_t(.125) * ((y6 - y0) + (y5 - y3) - (y7 - y1) - (y4 - y2)); + fjyet = Real_t(.125) * ((y6 - y0) - (y5 - y3) + (y7 - y1) - (y4 - y2)); + fjyze = Real_t(.125) * ((y6 - y0) + (y5 - y3) + (y7 - y1) + (y4 - y2)); + + fjzxi = Real_t(.125) * ((z6 - z0) + (z5 - z3) - (z7 - z1) - (z4 - z2)); + fjzet = Real_t(.125) * ((z6 - z0) - (z5 - z3) + (z7 - z1) - (z4 - z2)); + fjzze = Real_t(.125) * ((z6 - z0) + (z5 - z3) + (z7 - z1) + (z4 - z2)); + + cjxxi = (fjyet * fjzze) - (fjzet * fjyze); + cjxet = -(fjyxi * fjzze) + (fjzxi * fjyze); + cjxze = (fjyxi * fjzet) - (fjzxi * fjyet); + + cjyxi = -(fjxet * fjzze) + (fjzet * fjxze); + cjyet = (fjxxi * fjzze) - (fjzxi * fjxze); + cjyze = -(fjxxi * fjzet) + (fjzxi * fjxet); + + cjzxi = (fjxet * fjyze) - (fjyet * fjxze); + cjzet = -(fjxxi * fjyze) + (fjyxi * fjxze); + cjzze = (fjxxi * fjyet) - (fjyxi * fjxet); + + b[0][0] = -cjxxi - cjxet - cjxze; + b[0][1] = cjxxi - cjxet - cjxze; + b[0][2] = cjxxi + cjxet - cjxze; + b[0][3] = -cjxxi + cjxet - cjxze; + b[0][4] = -b[0][2]; + b[0][5] = -b[0][3]; + b[0][6] = -b[0][0]; + b[0][7] = -b[0][1]; + + b[1][0] = -cjyxi - cjyet - cjyze; + b[1][1] = cjyxi - cjyet - cjyze; + b[1][2] = cjyxi + cjyet - cjyze; + b[1][3] = -cjyxi + cjyet - cjyze; + b[1][4] = -b[1][2]; + b[1][5] = -b[1][3]; + b[1][6] = -b[1][0]; + b[1][7] = -b[1][1]; + + b[2][0] = -cjzxi - cjzet - cjzze; + b[2][1] = cjzxi - cjzet - cjzze; + b[2][2] = cjzxi + cjzet - cjzze; + b[2][3] = -cjzxi + cjzet - cjzze; + b[2][4] = -b[2][2]; + b[2][5] = -b[2][3]; + b[2][6] = -b[2][0]; + b[2][7] = -b[2][1]; + + *volume = Real_t(8.) * (fjxet * cjxet + fjyet * cjyet + fjzet * cjzet); +} + +KOKKOS_INLINE_FUNCTION void +SumElemFaceNormal(Real_t* normalX0, Real_t* normalY0, Real_t* normalZ0, Real_t* normalX1, + Real_t* normalY1, Real_t* normalZ1, Real_t* normalX2, Real_t* normalY2, + Real_t* normalZ2, Real_t* normalX3, Real_t* normalY3, Real_t* normalZ3, + const Real_t x0, const Real_t y0, const Real_t z0, const Real_t x1, + const Real_t y1, const Real_t z1, const Real_t x2, const Real_t y2, + const Real_t z2, const Real_t x3, const Real_t y3, const Real_t z3) +{ + Real_t bisectX0 = Real_t(0.5) * (x3 + x2 - x1 - x0); + Real_t bisectY0 = Real_t(0.5) * (y3 + y2 - y1 - y0); + Real_t bisectZ0 = Real_t(0.5) * (z3 + z2 - z1 - z0); + Real_t bisectX1 = Real_t(0.5) * (x2 + x1 - x3 - x0); + Real_t bisectY1 = Real_t(0.5) * (y2 + y1 - y3 - y0); + Real_t bisectZ1 = Real_t(0.5) * (z2 + z1 - z3 - z0); + Real_t areaX = Real_t(0.25) * (bisectY0 * bisectZ1 - bisectZ0 * bisectY1); + Real_t areaY = Real_t(0.25) * (bisectZ0 * bisectX1 - bisectX0 * bisectZ1); + Real_t areaZ = Real_t(0.25) * (bisectX0 * bisectY1 - bisectY0 * bisectX1); + + *normalX0 += areaX; + *normalX1 += areaX; + *normalX2 += areaX; + *normalX3 += areaX; + + *normalY0 += areaY; + *normalY1 += areaY; + *normalY2 += areaY; + *normalY3 += areaY; + + *normalZ0 += areaZ; + *normalZ1 += areaZ; + *normalZ2 += areaZ; + *normalZ3 += areaZ; +} + +KOKKOS_INLINE_FUNCTION void +CalcElemNodeNormals(Real_t pfx[8], Real_t pfy[8], Real_t pfz[8], const Real_t x[8], + const Real_t y[8], const Real_t z[8]) +{ + for(Index_t i = 0; i < 8; ++i) + { + pfx[i] = Real_t(0.0); + pfy[i] = Real_t(0.0); + pfz[i] = Real_t(0.0); + } + + SumElemFaceNormal(&pfx[0], &pfy[0], &pfz[0], &pfx[1], &pfy[1], &pfz[1], &pfx[2], + &pfy[2], &pfz[2], &pfx[3], &pfy[3], &pfz[3], x[0], y[0], z[0], x[1], + y[1], z[1], x[2], y[2], z[2], x[3], y[3], z[3]); + + SumElemFaceNormal(&pfx[0], &pfy[0], &pfz[0], &pfx[4], &pfy[4], &pfz[4], &pfx[5], + &pfy[5], &pfz[5], &pfx[1], &pfy[1], &pfz[1], x[0], y[0], z[0], x[4], + y[4], z[4], x[5], y[5], z[5], x[1], y[1], z[1]); + + SumElemFaceNormal(&pfx[1], &pfy[1], &pfz[1], &pfx[5], &pfy[5], &pfz[5], &pfx[6], + &pfy[6], &pfz[6], &pfx[2], &pfy[2], &pfz[2], x[1], y[1], z[1], x[5], + y[5], z[5], x[6], y[6], z[6], x[2], y[2], z[2]); + + SumElemFaceNormal(&pfx[2], &pfy[2], &pfz[2], &pfx[6], &pfy[6], &pfz[6], &pfx[7], + &pfy[7], &pfz[7], &pfx[3], &pfy[3], &pfz[3], x[2], y[2], z[2], x[6], + y[6], z[6], x[7], y[7], z[7], x[3], y[3], z[3]); + + SumElemFaceNormal(&pfx[3], &pfy[3], &pfz[3], &pfx[7], &pfy[7], &pfz[7], &pfx[4], + &pfy[4], &pfz[4], &pfx[0], &pfy[0], &pfz[0], x[3], y[3], z[3], x[7], + y[7], z[7], x[4], y[4], z[4], x[0], y[0], z[0]); + + SumElemFaceNormal(&pfx[4], &pfy[4], &pfz[4], &pfx[7], &pfy[7], &pfz[7], &pfx[6], + &pfy[6], &pfz[6], &pfx[5], &pfy[5], &pfz[5], x[4], y[4], z[4], x[7], + y[7], z[7], x[6], y[6], z[6], x[5], y[5], z[5]); +} + +KOKKOS_INLINE_FUNCTION void +SumElemStressesToNodeForces(const Real_t B[][8], const Real_t stress_xx, + const Real_t stress_yy, const Real_t stress_zz, Real_t fx[], + Real_t fy[], Real_t fz[]) +{ + for(Index_t i = 0; i < 8; i++) + { + fx[i] = -(stress_xx * B[0][i]); + fy[i] = -(stress_yy * B[1][i]); + fz[i] = -(stress_zz * B[2][i]); + } +} + +static inline void +IntegrateStressForElems(Domain& domain, Real_t* sigxx, Real_t* sigyy, Real_t* sigzz, + Real_t* determ, Index_t numElem, Index_t numNode) +{ + Index_t numElem8 = numElem * 8; + ResizeBuffer((numElem8 * sizeof(Real_t) + 4096) * 3); + Real_t* fx_elem = AllocateFromBuffer(numElem8); + Real_t* fy_elem = AllocateFromBuffer(numElem8); + Real_t* fz_elem = AllocateFromBuffer(numElem8); + + Kokkos::parallel_for( + "IntegrateStressForElems A", numElem, KOKKOS_LAMBDA(const int k) { + const Index_t* const elemToNode = &domain.nodelist(k, 0); + Real_t B[3][8]; + Real_t x_local[8]; + Real_t y_local[8]; + Real_t z_local[8]; + + CollectDomainNodesToElemNodes(domain, elemToNode, x_local, y_local, z_local); + + CalcElemShapeFunctionDerivatives(x_local, y_local, z_local, B, &determ[k]); + + CalcElemNodeNormals(B[0], B[1], B[2], x_local, y_local, z_local); + + SumElemStressesToNodeForces(B, sigxx[k], sigyy[k], sigzz[k], &fx_elem[k * 8], + &fy_elem[k * 8], &fz_elem[k * 8]); + }); + + int team_size = 1; + if(Kokkos::DefaultExecutionSpace().concurrency() > 1024) team_size = 128; + + Kokkos::parallel_for( + "IntegrateStressForElems B", + Kokkos::TeamPolicy<>((numNode + 127) / 128, team_size, 2), + KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<>::member_type& team) { + const Index_t gnode_begin = team.league_rank() * 128; + const Index_t gnode_end = + (gnode_begin + 128 < numNode) ? gnode_begin + 128 : numNode; + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, gnode_begin, gnode_end), + [&](const Index_t& gnode) { + Index_t count = domain.nodeElemCount(gnode); + Index_t* cornerList = domain.nodeElemCornerList(gnode); + reduce_double3 f_tmp; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, count), + [&](const Index_t& i, + reduce_double3& tmp) { // vectorized with ivdep + Index_t elem = cornerList[i]; + tmp.x += fx_elem[elem]; + tmp.y += fy_elem[elem]; + tmp.z += fz_elem[elem]; + }, + f_tmp); + Kokkos::single(Kokkos::PerThread(team), [&]() { + domain.fx(gnode) += f_tmp.x; + domain.fy(gnode) += f_tmp.y; + domain.fz(gnode) += f_tmp.z; + }); + }); + }); +} + +KOKKOS_INLINE_FUNCTION void +VoluDer(const Real_t x0, const Real_t x1, const Real_t x2, const Real_t x3, + const Real_t x4, const Real_t x5, const Real_t y0, const Real_t y1, + const Real_t y2, const Real_t y3, const Real_t y4, const Real_t y5, + const Real_t z0, const Real_t z1, const Real_t z2, const Real_t z3, + const Real_t z4, const Real_t z5, Real_t& dvdx, Real_t& dvdy, Real_t& dvdz) +{ + const Real_t twelfth = Real_t(1.0) / Real_t(12.0); + + dvdx = (y1 + y2) * (z0 + z1) - (y0 + y1) * (z1 + z2) + (y0 + y4) * (z3 + z4) - + (y3 + y4) * (z0 + z4) - (y2 + y5) * (z3 + z5) + (y3 + y5) * (z2 + z5); + dvdy = -(x1 + x2) * (z0 + z1) + (x0 + x1) * (z1 + z2) - (x0 + x4) * (z3 + z4) + + (x3 + x4) * (z0 + z4) + (x2 + x5) * (z3 + z5) - (x3 + x5) * (z2 + z5); + + dvdz = -(y1 + y2) * (x0 + x1) + (y0 + y1) * (x1 + x2) - (y0 + y4) * (x3 + x4) + + (y3 + y4) * (x0 + x4) + (y2 + y5) * (x3 + x5) - (y3 + y5) * (x2 + x5); + + dvdx *= twelfth; + dvdy *= twelfth; + dvdz *= twelfth; +} + +KOKKOS_INLINE_FUNCTION +void +CalcElemVolumeDerivative( + const Int_t& i, + const Kokkos::View>& dvdx, + const Kokkos::View>& dvdy, + const Kokkos::View>& dvdz, + const Real_t x[8], const Real_t y[8], const Real_t z[8]) +{ +#pragma nounroll + for(int j = 0; j < 4; j++) + { + VoluDer(x[(j + 1) % 4], x[(j + 2) % 4], x[(j + 3) % 4], x[(j + 0) % 4 + 4], + x[(j + 1) % 4 + 4], x[(j + 3) % 4 + 4], y[(j + 1) % 4], y[(j + 2) % 4], + y[(j + 3) % 4], y[(j + 0) % 4 + 4], y[(j + 1) % 4 + 4], + y[(j + 3) % 4 + 4], z[(j + 1) % 4], z[(j + 2) % 4], z[(j + 3) % 4], + z[(j + 0) % 4 + 4], z[(j + 1) % 4 + 4], z[(j + 3) % 4 + 4], dvdx(i, j), + dvdy(i, j), dvdz(i, j)); + VoluDer(x[(j + 3) % 4 + 4], x[(j + 2) % 4 + 4], x[(j + 1) % 4 + 4], + x[(j + 0) % 4], x[(j + 3) % 4], x[(j + 1) % 4], y[(j + 3) % 4 + 4], + y[(j + 2) % 4 + 4], y[(j + 1) % 4 + 4], y[(j + 0) % 4], y[(j + 3) % 4], + y[(j + 1) % 4], z[(j + 3) % 4 + 4], z[(j + 2) % 4 + 4], + z[(j + 1) % 4 + 4], z[(j + 0) % 4], z[(j + 3) % 4], z[(j + 1) % 4], + dvdx(i, j + 4), dvdy(i, j + 4), dvdz(i, j + 4)); + } +} + +KOKKOS_INLINE_FUNCTION +void +CalcElemFBHourglassForce(const Real_t* xd, const Real_t hourgam[][8], + const Real_t& coefficient, Real_t* hgfx) +{ + Real_t hxx[4]; + for(Index_t i = 0; i < 4; i++) + { + hxx[i] = hourgam[i][0] * xd[0] + hourgam[i][1] * xd[1] + hourgam[i][2] * xd[2] + + hourgam[i][3] * xd[3] + hourgam[i][4] * xd[4] + hourgam[i][5] * xd[5] + + hourgam[i][6] * xd[6] + hourgam[i][7] * xd[7]; + } + for(Index_t i = 0; i < 8; i++) + { + hgfx[i] = coefficient * (hourgam[0][i] * hxx[0] + hourgam[1][i] * hxx[1] + + hourgam[2][i] * hxx[2] + hourgam[3][i] * hxx[3]); + } +} + +struct Gamma +{ + Real_t gamma[4][8]; + Gamma() + { + gamma[0][0] = Real_t(1.); + gamma[0][1] = Real_t(1.); + gamma[0][2] = Real_t(-1.); + gamma[0][3] = Real_t(-1.); + gamma[0][4] = Real_t(-1.); + gamma[0][5] = Real_t(-1.); + gamma[0][6] = Real_t(1.); + gamma[0][7] = Real_t(1.); + gamma[1][0] = Real_t(1.); + gamma[1][1] = Real_t(-1.); + gamma[1][2] = Real_t(-1.); + gamma[1][3] = Real_t(1.); + gamma[1][4] = Real_t(-1.); + gamma[1][5] = Real_t(1.); + gamma[1][6] = Real_t(1.); + gamma[1][7] = Real_t(-1.); + gamma[2][0] = Real_t(1.); + gamma[2][1] = Real_t(-1.); + gamma[2][2] = Real_t(1.); + gamma[2][3] = Real_t(-1.); + gamma[2][4] = Real_t(1.); + gamma[2][5] = Real_t(-1.); + gamma[2][6] = Real_t(1.); + gamma[2][7] = Real_t(-1.); + gamma[3][0] = Real_t(-1.); + gamma[3][1] = Real_t(1.); + gamma[3][2] = Real_t(-1.); + gamma[3][3] = Real_t(1.); + gamma[3][4] = Real_t(1.); + gamma[3][5] = Real_t(-1.); + gamma[3][6] = Real_t(1.); + gamma[3][7] = Real_t(-1.); + } +}; + +static inline void +CalcFBHourglassForceForElems( + Domain& domain, Real_t* determ, + const Kokkos::View> x8n, + const Kokkos::View> y8n, + const Kokkos::View> z8n, + const Kokkos::View> dvdx, + const Kokkos::View> dvdy, + const Kokkos::View> dvdz, + Real_t hourg, Index_t numElem, Index_t numNode) +{ + Index_t numElem8 = numElem * 8; + + Real_t* fx_elem; + Real_t* fy_elem; + Real_t* fz_elem; + + if(do_atomic == 0) + { + fx_elem = AllocateFromBuffer(numElem8); + fy_elem = AllocateFromBuffer(numElem8); + fz_elem = AllocateFromBuffer(numElem8); + } + + Gamma G; + + Int_t do_atomic_dev = do_atomic; + + Kokkos::parallel_for( + "CalcFBHourglassForceForElems A", numElem, KOKKOS_LAMBDA(const int& i2) { + Real_t *fx_local, *fy_local, *fz_local; + Real_t hgfx[8]; + + Real_t hourgam[4][8]; + Real_t xd1[8]; + + const Index_t* elemToNode = &domain.nodelist(i2, 0); + Index_t i3 = 8 * i2; + Real_t volinv = Real_t(1.0) / determ[i2]; + + for(Index_t i1 = 0; i1 < 4; ++i1) + { + Real_t hourmodx = 0.0; + for(int j = 0; j < 8; j++) + hourmodx += x8n(i2, j) * G.gamma[i1][j]; + + Real_t hourmody = 0.0; + for(int j = 0; j < 8; j++) + hourmody += y8n(i2, j) * G.gamma[i1][j]; + + Real_t hourmodz = 0.0; + for(int j = 0; j < 8; j++) + hourmodz += z8n(i2, j) * G.gamma[i1][j]; + +#pragma ivdep + for(int j = 0; j < 8; j++) + hourgam[i1][j] = G.gamma[i1][j] - volinv * (dvdx(i2, j) * hourmodx + + dvdy(i2, j) * hourmody + + dvdz(i2, j) * hourmodz); + } + + const Real_t ss1 = domain.ss(i2); + const Real_t mass1 = domain.elemMass(i2); + const Real_t volume13 = CBRT(determ[i2]); + + const Index_t n0si2 = elemToNode[0]; + const Index_t n1si2 = elemToNode[1]; + const Index_t n2si2 = elemToNode[2]; + const Index_t n3si2 = elemToNode[3]; + const Index_t n4si2 = elemToNode[4]; + const Index_t n5si2 = elemToNode[5]; + const Index_t n6si2 = elemToNode[6]; + const Index_t n7si2 = elemToNode[7]; + + const Real_t coefficient = -hourg * Real_t(0.01) * ss1 * mass1 / volume13; + + xd1[0] = domain.xd(n0si2); + xd1[1] = domain.xd(n1si2); + xd1[2] = domain.xd(n2si2); + xd1[3] = domain.xd(n3si2); + xd1[4] = domain.xd(n4si2); + xd1[5] = domain.xd(n5si2); + xd1[6] = domain.xd(n6si2); + xd1[7] = domain.xd(n7si2); + + CalcElemFBHourglassForce(xd1, hourgam, coefficient, hgfx); + + if(!do_atomic_dev) + { + fx_local = &fx_elem[i3]; + fx_local[0] = hgfx[0]; + fx_local[1] = hgfx[1]; + fx_local[2] = hgfx[2]; + fx_local[3] = hgfx[3]; + fx_local[4] = hgfx[4]; + fx_local[5] = hgfx[5]; + fx_local[6] = hgfx[6]; + fx_local[7] = hgfx[7]; + } + else + { + Kokkos::atomic_add(&domain.fx(n0si2), hgfx[0]); + Kokkos::atomic_add(&domain.fx(n1si2), hgfx[1]); + Kokkos::atomic_add(&domain.fx(n2si2), hgfx[2]); + Kokkos::atomic_add(&domain.fx(n3si2), hgfx[3]); + Kokkos::atomic_add(&domain.fx(n4si2), hgfx[4]); + Kokkos::atomic_add(&domain.fx(n5si2), hgfx[5]); + Kokkos::atomic_add(&domain.fx(n6si2), hgfx[6]); + Kokkos::atomic_add(&domain.fx(n7si2), hgfx[7]); + } + + xd1[0] = domain.yd(n0si2); + xd1[1] = domain.yd(n1si2); + xd1[2] = domain.yd(n2si2); + xd1[3] = domain.yd(n3si2); + xd1[4] = domain.yd(n4si2); + xd1[5] = domain.yd(n5si2); + xd1[6] = domain.yd(n6si2); + xd1[7] = domain.yd(n7si2); + + CalcElemFBHourglassForce(xd1, hourgam, coefficient, hgfx); + + if(!do_atomic_dev) + { + fy_local = &fy_elem[i3]; + fy_local[0] = hgfx[0]; + fy_local[1] = hgfx[1]; + fy_local[2] = hgfx[2]; + fy_local[3] = hgfx[3]; + fy_local[4] = hgfx[4]; + fy_local[5] = hgfx[5]; + fy_local[6] = hgfx[6]; + fy_local[7] = hgfx[7]; + } + else + { + Kokkos::atomic_add(&domain.fy(n0si2), hgfx[0]); + Kokkos::atomic_add(&domain.fy(n1si2), hgfx[1]); + Kokkos::atomic_add(&domain.fy(n2si2), hgfx[2]); + Kokkos::atomic_add(&domain.fy(n3si2), hgfx[3]); + Kokkos::atomic_add(&domain.fy(n4si2), hgfx[4]); + Kokkos::atomic_add(&domain.fy(n5si2), hgfx[5]); + Kokkos::atomic_add(&domain.fy(n6si2), hgfx[6]); + Kokkos::atomic_add(&domain.fy(n7si2), hgfx[7]); + } + + xd1[0] = domain.zd(n0si2); + xd1[1] = domain.zd(n1si2); + xd1[2] = domain.zd(n2si2); + xd1[3] = domain.zd(n3si2); + xd1[4] = domain.zd(n4si2); + xd1[5] = domain.zd(n5si2); + xd1[6] = domain.zd(n6si2); + xd1[7] = domain.zd(n7si2); + + CalcElemFBHourglassForce(xd1, hourgam, coefficient, hgfx); + + if(!do_atomic_dev) + { + fz_local = &fz_elem[i3]; + fz_local[0] = hgfx[0]; + fz_local[1] = hgfx[1]; + fz_local[2] = hgfx[2]; + fz_local[3] = hgfx[3]; + fz_local[4] = hgfx[4]; + fz_local[5] = hgfx[5]; + fz_local[6] = hgfx[6]; + fz_local[7] = hgfx[7]; + } + else + { + Kokkos::atomic_add(&domain.fz(n0si2), hgfx[0]); + Kokkos::atomic_add(&domain.fz(n1si2), hgfx[1]); + Kokkos::atomic_add(&domain.fz(n2si2), hgfx[2]); + Kokkos::atomic_add(&domain.fz(n3si2), hgfx[3]); + Kokkos::atomic_add(&domain.fz(n4si2), hgfx[4]); + Kokkos::atomic_add(&domain.fz(n5si2), hgfx[5]); + Kokkos::atomic_add(&domain.fz(n6si2), hgfx[6]); + Kokkos::atomic_add(&domain.fz(n7si2), hgfx[7]); + } + }); + + if(!do_atomic) + { + int team_size = 1; + if(Kokkos::DefaultExecutionSpace().concurrency() > 1024) team_size = 128; + + Kokkos::parallel_for( + "CalcFBHourglassForceForElems B", + Kokkos::TeamPolicy<>((numNode + 127) / 128, team_size, 2), + KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<>::member_type& team) { + const Index_t gnode_begin = team.league_rank() * 128; + const Index_t gnode_end = + (gnode_begin + 128 < numNode) ? gnode_begin + 128 : numNode; + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, gnode_begin, gnode_end), + [&](const Index_t& gnode) { + Index_t count = domain.nodeElemCount(gnode); + Index_t* cornerList = domain.nodeElemCornerList(gnode); + reduce_double3 f_tmp; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, count), + [&](const Index_t& i, + reduce_double3& tmp) { // vectorized with ivdep + Index_t elem = cornerList[i]; + tmp.x += fx_elem[elem]; + tmp.y += fy_elem[elem]; + tmp.z += fz_elem[elem]; + }, + f_tmp); + Kokkos::single(Kokkos::PerThread(team), [&]() { + domain.fx(gnode) += f_tmp.x; + domain.fy(gnode) += f_tmp.y; + domain.fz(gnode) += f_tmp.z; + }); + }); + }); + } +} + +static inline void +CalcHourglassControlForElems(Domain& domain, Real_t determ[], Real_t hgcoef) +{ + Index_t numElem = domain.numElem(); + Index_t numElem8 = numElem * 8; + ResizeBuffer((numElem8 * sizeof(Real_t) + 4096) * (do_atomic ? 6 : 9)); + + Real_t* dvdx = AllocateFromBuffer(numElem8); + Real_t* dvdy = AllocateFromBuffer(numElem8); + Real_t* dvdz = AllocateFromBuffer(numElem8); + Real_t* x8n = AllocateFromBuffer(numElem8); + Real_t* y8n = AllocateFromBuffer(numElem8); + Real_t* z8n = AllocateFromBuffer(numElem8); + Kokkos::View> v_x8n(x8n, numElem, + 8); + Kokkos::View> v_y8n(y8n, numElem, + 8); + Kokkos::View> v_z8n(z8n, numElem, + 8); + Kokkos::View> v_dvdx(dvdx, numElem, + 8); + Kokkos::View> v_dvdy(dvdy, numElem, + 8); + Kokkos::View> v_dvdz(dvdz, numElem, + 8); + + int error = 0; + Kokkos::parallel_reduce( + "CalcHourglassControlForElems", numElem, + KOKKOS_LAMBDA(const int i, int& err) { + Real_t x1[8], y1[8], z1[8]; + + Index_t* elemToNode = &domain.nodelist(i, 0); + CollectDomainNodesToElemNodes(domain, elemToNode, x1, y1, z1); + + CalcElemVolumeDerivative(i, v_dvdx, v_dvdy, v_dvdz, x1, y1, z1); + + for(Index_t ii = 0; ii < 8; ++ii) + { + v_x8n(i, ii) = x1[ii]; + v_y8n(i, ii) = y1[ii]; + v_z8n(i, ii) = z1[ii]; + } + + determ[i] = domain.volo(i) * domain.v(i); + + if(domain.v(i) <= Real_t(0.0)) + { + err++; + } + }, + error); + + if(error) +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, VolumeError); +#else + exit(VolumeError); +#endif + + if(hgcoef > Real_t(0.)) + { + CalcFBHourglassForceForElems(domain, determ, v_x8n, v_y8n, v_z8n, v_dvdx, v_dvdy, + v_dvdz, hgcoef, numElem, domain.numNode()); + } + + return; +} + +static inline void +CalcVolumeForceForElems(Domain& domain) +{ + Index_t numElem = domain.numElem(); + if(numElem != 0) + { + Real_t hgcoef = domain.hgcoef(); + Kokkos::View sigxx("sigxx", numElem); + Kokkos::View sigyy("sigyy", numElem); + Kokkos::View sigzz("sigzz", numElem); + Kokkos::View determ("determ", numElem); + + InitStressTermsForElems(domain, sigxx.data(), sigyy.data(), sigzz.data(), + numElem); + + IntegrateStressForElems(domain, sigxx.data(), sigyy.data(), sigzz.data(), + determ.data(), numElem, domain.numNode()); + + // check for negative element volume + int error = 0; + Kokkos::parallel_reduce( + "CalcVolumeForceForElems", numElem, + KOKKOS_LAMBDA(const int k, int& err) { + if(determ[k] <= Real_t(0.0)) + { + err++; + } + }, + error); + + if(error) +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, VolumeError); +#else + exit(VolumeError); +#endif + + CalcHourglassControlForElems(domain, determ.data(), hgcoef); + } +} + +static inline void +CalcForceForNodes(Domain& domain) +{ + Index_t numNode = domain.numNode(); + +#if USE_MPI + CommRecv(domain, MSG_COMM_SBN, 3, domain.sizeX() + 1, domain.sizeY() + 1, + domain.sizeZ() + 1, true, false); +#endif + + Kokkos::parallel_for( + "CalcForceForNodes", numNode, KOKKOS_LAMBDA(const int i) { + domain.fx(i) = Real_t(0.0); + domain.fy(i) = Real_t(0.0); + domain.fz(i) = Real_t(0.0); + }); + + CalcVolumeForceForElems(domain); + +#if USE_MPI + Domain_member fieldData[3]; + fieldData[0] = &Domain::fx; + fieldData[1] = &Domain::fy; + fieldData[2] = &Domain::fz; + + CommSend(domain, MSG_COMM_SBN, 3, fieldData, domain.sizeX() + 1, domain.sizeY() + 1, + domain.sizeZ() + 1, true, false); + CommSBN(domain, 3, fieldData); +#endif +} + +static inline void +CalcAccelerationForNodes(Domain& domain, Index_t numNode) +{ + Kokkos::parallel_for( + "CalcAccelerationForNodes", numNode, KOKKOS_LAMBDA(const int i) { + domain.xdd(i) = domain.fx(i) / domain.nodalMass(i); + domain.ydd(i) = domain.fy(i) / domain.nodalMass(i); + domain.zdd(i) = domain.fz(i) / domain.nodalMass(i); + }); +} + +static inline void +ApplyAccelerationBoundaryConditionsForNodes(Domain& domain) +{ + Index_t size = domain.sizeX(); + Index_t numNodeBC = (size + 1) * (size + 1); + + if(!domain.symmXempty() != 0) + { + Kokkos::parallel_for( + "ApplyAccelerationBoundaryConditionsForNodes A", numNodeBC, + KOKKOS_LAMBDA(const int i) { domain.xdd(domain.symmX(i)) = Real_t(0.0); }); + } + + if(!domain.symmYempty() != 0) + { + Kokkos::parallel_for( + "ApplyAccelerationBoundaryConditionsForNodes B", numNodeBC, + KOKKOS_LAMBDA(const int i) { domain.ydd(domain.symmY(i)) = Real_t(0.0); }); + } + + if(!domain.symmZempty() != 0) + { + Kokkos::parallel_for( + "ApplyAccelerationBoundaryConditionsForNodes C", numNodeBC, + KOKKOS_LAMBDA(const int i) { domain.zdd(domain.symmZ(i)) = Real_t(0.0); }); + } +} + +static inline void +CalcVelocityForNodes(Domain& domain, const Real_t dt, const Real_t u_cut, Index_t numNode) +{ + Kokkos::parallel_for( + "CalcVelocityForNodes", numNode, KOKKOS_LAMBDA(const int i) { + Real_t xdtmp, ydtmp, zdtmp; + + xdtmp = domain.xd(i) + domain.xdd(i) * dt; + if(FABS(xdtmp) < u_cut) xdtmp = Real_t(0.0); + domain.xd(i) = xdtmp; + + ydtmp = domain.yd(i) + domain.ydd(i) * dt; + if(FABS(ydtmp) < u_cut) ydtmp = Real_t(0.0); + domain.yd(i) = ydtmp; + + zdtmp = domain.zd(i) + domain.zdd(i) * dt; + if(FABS(zdtmp) < u_cut) zdtmp = Real_t(0.0); + domain.zd(i) = zdtmp; + }); +} + +static inline void +CalcPositionForNodes(Domain& domain, const Real_t dt, Index_t numNode) +{ + Kokkos::parallel_for( + "CalcPositionForNodes", numNode, KOKKOS_LAMBDA(const int i) { + domain.x(i) += domain.xd(i) * dt; + domain.y(i) += domain.yd(i) * dt; + domain.z(i) += domain.zd(i) * dt; + }); +} + +static inline void +LagrangeNodal(Domain& domain) +{ +#ifdef SEDOV_SYNC_POS_VEL_EARLY + Domain_member fieldData[6]; +#endif + + const Real_t delt = domain.deltatime(); + Real_t u_cut = domain.u_cut(); + + CalcForceForNodes(domain); + +#if USE_MPI +# ifdef SEDOV_SYNC_POS_VEL_EARLY + CommRecv(domain, MSG_SYNC_POS_VEL, 6, domain.sizeX() + 1, domain.sizeY() + 1, + domain.sizeZ() + 1, false, false); +# endif +#endif + + CalcAccelerationForNodes(domain, domain.numNode()); + + ApplyAccelerationBoundaryConditionsForNodes(domain); + + CalcVelocityForNodes(domain, delt, u_cut, domain.numNode()); + + CalcPositionForNodes(domain, delt, domain.numNode()); +#if USE_MPI +# ifdef SEDOV_SYNC_POS_VEL_EARLY + fieldData[0] = &Domain::x; + fieldData[1] = &Domain::y; + fieldData[2] = &Domain::z; + fieldData[3] = &Domain::xd; + fieldData[4] = &Domain::yd; + fieldData[5] = &Domain::zd; + + CommSend(domain, MSG_SYNC_POS_VEL, 6, fieldData, domain.sizeX() + 1, + domain.sizeY() + 1, domain.sizeZ() + 1, false, false); + CommSyncPosVel(domain); +# endif +#endif + + return; +} + +KOKKOS_INLINE_FUNCTION Real_t +CalcElemVolume(const Real_t x0, const Real_t x1, const Real_t x2, const Real_t x3, + const Real_t x4, const Real_t x5, const Real_t x6, const Real_t x7, + const Real_t y0, const Real_t y1, const Real_t y2, const Real_t y3, + const Real_t y4, const Real_t y5, const Real_t y6, const Real_t y7, + const Real_t z0, const Real_t z1, const Real_t z2, const Real_t z3, + const Real_t z4, const Real_t z5, const Real_t z6, const Real_t z7) +{ + Real_t twelveth = Real_t(1.0) / Real_t(12.0); + + Real_t dx61 = x6 - x1; + Real_t dy61 = y6 - y1; + Real_t dz61 = z6 - z1; + + Real_t dx70 = x7 - x0; + Real_t dy70 = y7 - y0; + Real_t dz70 = z7 - z0; + + Real_t dx63 = x6 - x3; + Real_t dy63 = y6 - y3; + Real_t dz63 = z6 - z3; + + Real_t dx20 = x2 - x0; + Real_t dy20 = y2 - y0; + Real_t dz20 = z2 - z0; + + Real_t dx50 = x5 - x0; + Real_t dy50 = y5 - y0; + Real_t dz50 = z5 - z0; + + Real_t dx64 = x6 - x4; + Real_t dy64 = y6 - y4; + Real_t dz64 = z6 - z4; + + Real_t dx31 = x3 - x1; + Real_t dy31 = y3 - y1; + Real_t dz31 = z3 - z1; + + Real_t dx72 = x7 - x2; + Real_t dy72 = y7 - y2; + Real_t dz72 = z7 - z2; + + Real_t dx43 = x4 - x3; + Real_t dy43 = y4 - y3; + Real_t dz43 = z4 - z3; + + Real_t dx57 = x5 - x7; + Real_t dy57 = y5 - y7; + Real_t dz57 = z5 - z7; + + Real_t dx14 = x1 - x4; + Real_t dy14 = y1 - y4; + Real_t dz14 = z1 - z4; + + Real_t dx25 = x2 - x5; + Real_t dy25 = y2 - y5; + Real_t dz25 = z2 - z5; + +#define TRIPLE_PRODUCT(x1, y1, z1, x2, y2, z2, x3, y3, z3) \ + ((x1) * ((y2) * (z3) - (z2) * (y3)) + (x2) * ((z1) * (y3) - (y1) * (z3)) + \ + (x3) * ((y1) * (z2) - (z1) * (y2))) + + Real_t volume = TRIPLE_PRODUCT(dx31 + dx72, dx63, dx20, dy31 + dy72, dy63, dy20, + dz31 + dz72, dz63, dz20) + + TRIPLE_PRODUCT(dx43 + dx57, dx64, dx70, dy43 + dy57, dy64, dy70, + dz43 + dz57, dz64, dz70) + + TRIPLE_PRODUCT(dx14 + dx25, dx61, dx50, dy14 + dy25, dy61, dy50, + dz14 + dz25, dz61, dz50); + +#undef TRIPLE_PRODUCT + + volume *= twelveth; + + return volume; +} + +KOKKOS_INLINE_FUNCTION +Real_t +CalcElemVolume(const Real_t x[8], const Real_t y[8], const Real_t z[8]) +{ + return CalcElemVolume(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], y[0], y[1], + y[2], y[3], y[4], y[5], y[6], y[7], z[0], z[1], z[2], z[3], + z[4], z[5], z[6], z[7]); +} + +KOKKOS_INLINE_FUNCTION +Real_t +AreaFace(const Real_t x0, const Real_t x1, const Real_t x2, const Real_t x3, + const Real_t y0, const Real_t y1, const Real_t y2, const Real_t y3, + const Real_t z0, const Real_t z1, const Real_t z2, const Real_t z3) +{ + Real_t fx = (x2 - x0) - (x3 - x1); + Real_t fy = (y2 - y0) - (y3 - y1); + Real_t fz = (z2 - z0) - (z3 - z1); + Real_t gx = (x2 - x0) + (x3 - x1); + Real_t gy = (y2 - y0) + (y3 - y1); + Real_t gz = (z2 - z0) + (z3 - z1); + Real_t area = (fx * fx + fy * fy + fz * fz) * (gx * gx + gy * gy + gz * gz) - + (fx * gx + fy * gy + fz * gz) * (fx * gx + fy * gy + fz * gz); + return area; +} + +KOKKOS_INLINE_FUNCTION Real_t +CalcElemCharacteristicLength(const Real_t x[8], const Real_t y[8], const Real_t z[8], + const Real_t volume) +{ + Real_t a, charLength = Real_t(0.0); + + a = AreaFace(x[0], x[1], x[2], x[3], y[0], y[1], y[2], y[3], z[0], z[1], z[2], z[3]); + charLength = MAX(a, charLength); + + a = AreaFace(x[4], x[5], x[6], x[7], y[4], y[5], y[6], y[7], z[4], z[5], z[6], z[7]); + charLength = MAX(a, charLength); + + a = AreaFace(x[0], x[1], x[5], x[4], y[0], y[1], y[5], y[4], z[0], z[1], z[5], z[4]); + charLength = MAX(a, charLength); + + a = AreaFace(x[1], x[2], x[6], x[5], y[1], y[2], y[6], y[5], z[1], z[2], z[6], z[5]); + charLength = MAX(a, charLength); + + a = AreaFace(x[2], x[3], x[7], x[6], y[2], y[3], y[7], y[6], z[2], z[3], z[7], z[6]); + charLength = MAX(a, charLength); + + a = AreaFace(x[3], x[0], x[4], x[7], y[3], y[0], y[4], y[7], z[3], z[0], z[4], z[7]); + charLength = MAX(a, charLength); + + charLength = Real_t(4.0) * volume / SQRT(charLength); + + return charLength; +} + +KOKKOS_INLINE_FUNCTION void +CalcElemVelocityGradient(const Real_t* const xvel, const Real_t* const yvel, + const Real_t* const zvel, const Real_t b[][8], const Real_t detJ, + Real_t* const d) +{ + const Real_t inv_detJ = Real_t(1.0) / detJ; + Real_t dyddx, dxddy, dzddx, dxddz, dzddy, dyddz; + const Real_t* const pfx = b[0]; + const Real_t* const pfy = b[1]; + const Real_t* const pfz = b[2]; + + d[0] = inv_detJ * (pfx[0] * (xvel[0] - xvel[6]) + pfx[1] * (xvel[1] - xvel[7]) + + pfx[2] * (xvel[2] - xvel[4]) + pfx[3] * (xvel[3] - xvel[5])); + + d[1] = inv_detJ * (pfy[0] * (yvel[0] - yvel[6]) + pfy[1] * (yvel[1] - yvel[7]) + + pfy[2] * (yvel[2] - yvel[4]) + pfy[3] * (yvel[3] - yvel[5])); + + d[2] = inv_detJ * (pfz[0] * (zvel[0] - zvel[6]) + pfz[1] * (zvel[1] - zvel[7]) + + pfz[2] * (zvel[2] - zvel[4]) + pfz[3] * (zvel[3] - zvel[5])); + + dyddx = inv_detJ * (pfx[0] * (yvel[0] - yvel[6]) + pfx[1] * (yvel[1] - yvel[7]) + + pfx[2] * (yvel[2] - yvel[4]) + pfx[3] * (yvel[3] - yvel[5])); + + dxddy = inv_detJ * (pfy[0] * (xvel[0] - xvel[6]) + pfy[1] * (xvel[1] - xvel[7]) + + pfy[2] * (xvel[2] - xvel[4]) + pfy[3] * (xvel[3] - xvel[5])); + + dzddx = inv_detJ * (pfx[0] * (zvel[0] - zvel[6]) + pfx[1] * (zvel[1] - zvel[7]) + + pfx[2] * (zvel[2] - zvel[4]) + pfx[3] * (zvel[3] - zvel[5])); + + dxddz = inv_detJ * (pfz[0] * (xvel[0] - xvel[6]) + pfz[1] * (xvel[1] - xvel[7]) + + pfz[2] * (xvel[2] - xvel[4]) + pfz[3] * (xvel[3] - xvel[5])); + + dzddy = inv_detJ * (pfy[0] * (zvel[0] - zvel[6]) + pfy[1] * (zvel[1] - zvel[7]) + + pfy[2] * (zvel[2] - zvel[4]) + pfy[3] * (zvel[3] - zvel[5])); + + dyddz = inv_detJ * (pfz[0] * (yvel[0] - yvel[6]) + pfz[1] * (yvel[1] - yvel[7]) + + pfz[2] * (yvel[2] - yvel[4]) + pfz[3] * (yvel[3] - yvel[5])); + d[5] = Real_t(.5) * (dxddy + dyddx); + d[4] = Real_t(.5) * (dxddz + dzddx); + d[3] = Real_t(.5) * (dzddy + dyddz); +} + +void +CalcKinematicsForElems(Domain& domain, Real_t deltaTime, Index_t numElem) +{ + Kokkos::parallel_for( + "CalcKinematicsForElems", numElem, KOKKOS_LAMBDA(const int k) { + Real_t B[3][8]; + Real_t D[6]; + Real_t x_local[8]; + Real_t y_local[8]; + Real_t z_local[8]; + Real_t xd_local[8]; + Real_t yd_local[8]; + Real_t zd_local[8]; + Real_t detJ = Real_t(0.0); + + Real_t volume; + Real_t relativeVolume; + const Index_t* const elemToNode = &domain.nodelist(k, 0); + + CollectDomainNodesToElemNodes(domain, elemToNode, x_local, y_local, z_local); + + volume = CalcElemVolume(x_local, y_local, z_local); + relativeVolume = volume / domain.volo(k); + domain.vnew(k) = relativeVolume; + domain.delv(k) = relativeVolume - domain.v(k); + + domain.arealg(k) = + CalcElemCharacteristicLength(x_local, y_local, z_local, volume); + + for(Index_t lnode = 0; lnode < 8; ++lnode) + { + Index_t gnode = elemToNode[lnode]; + xd_local[lnode] = domain.c_xd(gnode); + yd_local[lnode] = domain.c_yd(gnode); + zd_local[lnode] = domain.c_zd(gnode); + } + + Real_t dt2 = Real_t(0.5) * deltaTime; + for(Index_t j = 0; j < 8; ++j) + { + x_local[j] -= dt2 * xd_local[j]; + y_local[j] -= dt2 * yd_local[j]; + z_local[j] -= dt2 * zd_local[j]; + } + + CalcElemShapeFunctionDerivatives(x_local, y_local, z_local, B, &detJ); + + CalcElemVelocityGradient(xd_local, yd_local, zd_local, B, detJ, D); + + domain.dxx(k) = D[0]; + domain.dyy(k) = D[1]; + domain.dzz(k) = D[2]; + }); +} + +static inline void +CalcLagrangeElements(Domain& domain) +{ + Index_t numElem = domain.numElem(); + if(numElem > 0) + { + const Real_t deltatime = domain.deltatime(); + + domain.AllocateStrains(numElem); + + CalcKinematicsForElems(domain, deltatime, numElem); + + int error = 0; + Kokkos::parallel_reduce( + "CalcLagrangeElements", numElem, + KOKKOS_LAMBDA(const int k, int& err) { + Real_t vdov = domain.dxx(k) + domain.dyy(k) + domain.dzz(k); + Real_t vdovthird = vdov / Real_t(3.0); + + domain.vdov(k) = vdov; + domain.dxx(k) -= vdovthird; + domain.dyy(k) -= vdovthird; + domain.dzz(k) -= vdovthird; + + if(domain.vnew(k) <= Real_t(0.0)) + { + err++; + } + }, + error); + + if(error) +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, VolumeError); +#else + exit(VolumeError); +#endif + + domain.DeallocateStrains(); + } +} + +static inline void +CalcMonotonicQGradientsForElems(Domain& domain) +{ + Index_t numElem = domain.numElem(); + + Kokkos::parallel_for( + "CalcMonotonicQGradientsForElems", numElem, KOKKOS_LAMBDA(const int i) { + const Real_t ptiny = Real_t(1.e-36); + Real_t ax, ay, az; + Real_t dxv, dyv, dzv; + + const Index_t* elemToNode = &domain.nodelist(i, 0); + Index_t n0 = elemToNode[0]; + Index_t n1 = elemToNode[1]; + Index_t n2 = elemToNode[2]; + Index_t n3 = elemToNode[3]; + Index_t n4 = elemToNode[4]; + Index_t n5 = elemToNode[5]; + Index_t n6 = elemToNode[6]; + Index_t n7 = elemToNode[7]; + + Real_t x0 = domain.x(n0); + Real_t x1 = domain.x(n1); + Real_t x2 = domain.x(n2); + Real_t x3 = domain.x(n3); + Real_t x4 = domain.x(n4); + Real_t x5 = domain.x(n5); + Real_t x6 = domain.x(n6); + Real_t x7 = domain.x(n7); + + Real_t y0 = domain.y(n0); + Real_t y1 = domain.y(n1); + Real_t y2 = domain.y(n2); + Real_t y3 = domain.y(n3); + Real_t y4 = domain.y(n4); + Real_t y5 = domain.y(n5); + Real_t y6 = domain.y(n6); + Real_t y7 = domain.y(n7); + + Real_t z0 = domain.z(n0); + Real_t z1 = domain.z(n1); + Real_t z2 = domain.z(n2); + Real_t z3 = domain.z(n3); + Real_t z4 = domain.z(n4); + Real_t z5 = domain.z(n5); + Real_t z6 = domain.z(n6); + Real_t z7 = domain.z(n7); + + Real_t xv0 = domain.xd(n0); + Real_t xv1 = domain.xd(n1); + Real_t xv2 = domain.xd(n2); + Real_t xv3 = domain.xd(n3); + Real_t xv4 = domain.xd(n4); + Real_t xv5 = domain.xd(n5); + Real_t xv6 = domain.xd(n6); + Real_t xv7 = domain.xd(n7); + + Real_t yv0 = domain.yd(n0); + Real_t yv1 = domain.yd(n1); + Real_t yv2 = domain.yd(n2); + Real_t yv3 = domain.yd(n3); + Real_t yv4 = domain.yd(n4); + Real_t yv5 = domain.yd(n5); + Real_t yv6 = domain.yd(n6); + Real_t yv7 = domain.yd(n7); + + Real_t zv0 = domain.zd(n0); + Real_t zv1 = domain.zd(n1); + Real_t zv2 = domain.zd(n2); + Real_t zv3 = domain.zd(n3); + Real_t zv4 = domain.zd(n4); + Real_t zv5 = domain.zd(n5); + Real_t zv6 = domain.zd(n6); + Real_t zv7 = domain.zd(n7); + + Real_t vol = domain.volo(i) * domain.vnew(i); + Real_t norm = Real_t(1.0) / (vol + ptiny); + + Real_t dxj = Real_t(-0.25) * ((x0 + x1 + x5 + x4) - (x3 + x2 + x6 + x7)); + Real_t dyj = Real_t(-0.25) * ((y0 + y1 + y5 + y4) - (y3 + y2 + y6 + y7)); + Real_t dzj = Real_t(-0.25) * ((z0 + z1 + z5 + z4) - (z3 + z2 + z6 + z7)); + + Real_t dxi = Real_t(0.25) * ((x1 + x2 + x6 + x5) - (x0 + x3 + x7 + x4)); + Real_t dyi = Real_t(0.25) * ((y1 + y2 + y6 + y5) - (y0 + y3 + y7 + y4)); + Real_t dzi = Real_t(0.25) * ((z1 + z2 + z6 + z5) - (z0 + z3 + z7 + z4)); + + Real_t dxk = Real_t(0.25) * ((x4 + x5 + x6 + x7) - (x0 + x1 + x2 + x3)); + Real_t dyk = Real_t(0.25) * ((y4 + y5 + y6 + y7) - (y0 + y1 + y2 + y3)); + Real_t dzk = Real_t(0.25) * ((z4 + z5 + z6 + z7) - (z0 + z1 + z2 + z3)); + + ax = dyi * dzj - dzi * dyj; + ay = dzi * dxj - dxi * dzj; + az = dxi * dyj - dyi * dxj; + + domain.delx_zeta(i) = vol / SQRT(ax * ax + ay * ay + az * az + ptiny); + + ax *= norm; + ay *= norm; + az *= norm; + + dxv = Real_t(0.25) * ((xv4 + xv5 + xv6 + xv7) - (xv0 + xv1 + xv2 + xv3)); + dyv = Real_t(0.25) * ((yv4 + yv5 + yv6 + yv7) - (yv0 + yv1 + yv2 + yv3)); + dzv = Real_t(0.25) * ((zv4 + zv5 + zv6 + zv7) - (zv0 + zv1 + zv2 + zv3)); + + domain.delv_zeta(i) = ax * dxv + ay * dyv + az * dzv; + + ax = dyj * dzk - dzj * dyk; + ay = dzj * dxk - dxj * dzk; + az = dxj * dyk - dyj * dxk; + + domain.delx_xi(i) = vol / SQRT(ax * ax + ay * ay + az * az + ptiny); + + ax *= norm; + ay *= norm; + az *= norm; + + dxv = Real_t(0.25) * ((xv1 + xv2 + xv6 + xv5) - (xv0 + xv3 + xv7 + xv4)); + dyv = Real_t(0.25) * ((yv1 + yv2 + yv6 + yv5) - (yv0 + yv3 + yv7 + yv4)); + dzv = Real_t(0.25) * ((zv1 + zv2 + zv6 + zv5) - (zv0 + zv3 + zv7 + zv4)); + + domain.delv_xi(i) = ax * dxv + ay * dyv + az * dzv; + + ax = dyk * dzi - dzk * dyi; + ay = dzk * dxi - dxk * dzi; + az = dxk * dyi - dyk * dxi; + + domain.delx_eta(i) = vol / SQRT(ax * ax + ay * ay + az * az + ptiny); + + ax *= norm; + ay *= norm; + az *= norm; + + dxv = Real_t(-0.25) * ((xv0 + xv1 + xv5 + xv4) - (xv3 + xv2 + xv6 + xv7)); + dyv = Real_t(-0.25) * ((yv0 + yv1 + yv5 + yv4) - (yv3 + yv2 + yv6 + yv7)); + dzv = Real_t(-0.25) * ((zv0 + zv1 + zv5 + zv4) - (zv3 + zv2 + zv6 + zv7)); + + domain.delv_eta(i) = ax * dxv + ay * dyv + az * dzv; + }); +} + +static inline void +CalcMonotonicQRegionForElems(Domain& domain, Int_t r, Real_t ptiny) +{ + Real_t monoq_limiter_mult = domain.monoq_limiter_mult(); + Real_t monoq_max_slope = domain.monoq_max_slope(); + Real_t qlc_monoq = domain.qlc_monoq(); + Real_t qqc_monoq = domain.qqc_monoq(); + + Kokkos::parallel_for( + "CalcMonotonicQRegionForElems", domain.regElemSize(r), + KOKKOS_LAMBDA(const int i) { + Index_t ielem = domain.regElemlist(r, i); + Real_t qlin, qquad; + Real_t phixi, phieta, phizeta; + Int_t bcMask = domain.elemBC(ielem); + Real_t delvm = 0.0, delvp = 0.0; + + Real_t norm = Real_t(1.) / (domain.delv_xi(ielem) + ptiny); + + switch(bcMask & XI_M) + { + case XI_M_COMM: + case 0: delvm = domain.delv_xi(domain.lxim(ielem)); break; + case XI_M_SYMM: delvm = domain.delv_xi(ielem); break; + case XI_M_FREE: delvm = Real_t(0.0); break; + default: + printf("Error in switch at %s line %d\n", __FILE__, __LINE__); + delvm = 0; + break; + } + switch(bcMask & XI_P) + { + case XI_P_COMM: + case 0: delvp = domain.delv_xi(domain.lxip(ielem)); break; + case XI_P_SYMM: delvp = domain.delv_xi(ielem); break; + case XI_P_FREE: delvp = Real_t(0.0); break; + default: + printf("Error in switch at %s line %d\n", __FILE__, __LINE__); + delvp = 0; + break; + } + + delvm = delvm * norm; + delvp = delvp * norm; + + phixi = Real_t(.5) * (delvm + delvp); + + delvm *= monoq_limiter_mult; + delvp *= monoq_limiter_mult; + + if(delvm < phixi) phixi = delvm; + if(delvp < phixi) phixi = delvp; + if(phixi < Real_t(0.)) phixi = Real_t(0.); + if(phixi > monoq_max_slope) phixi = monoq_max_slope; + + norm = Real_t(1.) / (domain.delv_eta(ielem) + ptiny); + + switch(bcMask & ETA_M) + { + case ETA_M_COMM: + case 0: delvm = domain.delv_eta(domain.letam(ielem)); break; + case ETA_M_SYMM: delvm = domain.delv_eta(ielem); break; + case ETA_M_FREE: delvm = Real_t(0.0); break; + default: + printf("Error in switch at %s line %d\n", __FILE__, __LINE__); + delvm = 0; + break; + } + switch(bcMask & ETA_P) + { + case ETA_P_COMM: + case 0: delvp = domain.delv_eta(domain.letap(ielem)); break; + case ETA_P_SYMM: delvp = domain.delv_eta(ielem); break; + case ETA_P_FREE: delvp = Real_t(0.0); break; + default: + printf("Error in switch at %s line %d\n", __FILE__, __LINE__); + delvp = 0; + break; + } + + delvm = delvm * norm; + delvp = delvp * norm; + + phieta = Real_t(.5) * (delvm + delvp); + + delvm *= monoq_limiter_mult; + delvp *= monoq_limiter_mult; + + if(delvm < phieta) phieta = delvm; + if(delvp < phieta) phieta = delvp; + if(phieta < Real_t(0.)) phieta = Real_t(0.); + if(phieta > monoq_max_slope) phieta = monoq_max_slope; + + norm = Real_t(1.) / (domain.delv_zeta(ielem) + ptiny); + + switch(bcMask & ZETA_M) + { + case ZETA_M_COMM: + case 0: delvm = domain.delv_zeta(domain.lzetam(ielem)); break; + case ZETA_M_SYMM: delvm = domain.delv_zeta(ielem); break; + case ZETA_M_FREE: delvm = Real_t(0.0); break; + default: + printf("Error in switch at %s line %d\n", __FILE__, __LINE__); + delvm = 0; + break; + } + switch(bcMask & ZETA_P) + { + case ZETA_P_COMM: + case 0: delvp = domain.delv_zeta(domain.lzetap(ielem)); break; + case ZETA_P_SYMM: delvp = domain.delv_zeta(ielem); break; + case ZETA_P_FREE: delvp = Real_t(0.0); break; + default: + printf("Error in switch at %s line %d\n", __FILE__, __LINE__); + delvp = 0; + break; + } + + delvm = delvm * norm; + delvp = delvp * norm; + + phizeta = Real_t(.5) * (delvm + delvp); + + delvm *= monoq_limiter_mult; + delvp *= monoq_limiter_mult; + + if(delvm < phizeta) phizeta = delvm; + if(delvp < phizeta) phizeta = delvp; + if(phizeta < Real_t(0.)) phizeta = Real_t(0.); + if(phizeta > monoq_max_slope) phizeta = monoq_max_slope; + + if(domain.vdov(ielem) > Real_t(0.)) + { + qlin = Real_t(0.); + qquad = Real_t(0.); + } + else + { + Real_t delvxxi = domain.delv_xi(ielem) * domain.delx_xi(ielem); + Real_t delvxeta = domain.delv_eta(ielem) * domain.delx_eta(ielem); + Real_t delvxzeta = domain.delv_zeta(ielem) * domain.delx_zeta(ielem); + + if(delvxxi > Real_t(0.)) delvxxi = Real_t(0.); + if(delvxeta > Real_t(0.)) delvxeta = Real_t(0.); + if(delvxzeta > Real_t(0.)) delvxzeta = Real_t(0.); + + Real_t rho = + domain.elemMass(ielem) / (domain.volo(ielem) * domain.vnew(ielem)); + + qlin = + -qlc_monoq * rho * + (delvxxi * (Real_t(1.) - phixi) + delvxeta * (Real_t(1.) - phieta) + + delvxzeta * (Real_t(1.) - phizeta)); + + qquad = qqc_monoq * rho * + (delvxxi * delvxxi * (Real_t(1.) - phixi * phixi) + + delvxeta * delvxeta * (Real_t(1.) - phieta * phieta) + + delvxzeta * delvxzeta * (Real_t(1.) - phizeta * phizeta)); + } + + domain.qq(ielem) = qquad; + domain.ql(ielem) = qlin; + }); +} + +static inline void +CalcMonotonicQForElems(Domain& domain) +{ + const Real_t ptiny = Real_t(1.e-36); + + for(Index_t r = 0; r < domain.numReg(); ++r) + { + if(domain.regElemSize(r) > 0) + { + CalcMonotonicQRegionForElems(domain, r, ptiny); + } + } +} + +static inline void +CalcQForElems(Domain& domain) +{ + Index_t numElem = domain.numElem(); + + if(numElem != 0) + { + Int_t allElem = numElem + /* local elem */ + 2 * domain.sizeX() * domain.sizeY() + /* plane ghosts */ + 2 * domain.sizeX() * domain.sizeZ() + /* row ghosts */ + 2 * domain.sizeY() * domain.sizeZ(); /* col ghosts */ + + domain.AllocateGradients(numElem, allElem); + +#if USE_MPI + CommRecv(domain, MSG_MONOQ, 3, domain.sizeX(), domain.sizeY(), domain.sizeZ(), + true, true); +#endif + CalcMonotonicQGradientsForElems(domain); + +#if USE_MPI + Domain_member fieldData[3]; + + fieldData[0] = &Domain::delv_xi; + fieldData[1] = &Domain::delv_eta; + fieldData[2] = &Domain::delv_zeta; + + CommSend(domain, MSG_MONOQ, 3, fieldData, domain.sizeX(), domain.sizeY(), + domain.sizeZ(), true, true); + + CommMonoQ(domain); +#endif + + CalcMonotonicQForElems(domain); + + domain.DeallocateGradients(); + + Index_t idx = 0; + Kokkos::parallel_reduce( + "CalcQForElems", numElem, + KOKKOS_LAMBDA(const Index_t& i, Index_t& count) { + if(domain.q(i) > domain.qstop()) + { + count++; + } + }, + idx); + + if(idx > 0) + { +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, QStopError); +#else + exit(QStopError); +#endif + } + } +} + +KOKKOS_INLINE_FUNCTION +void +CalcPressureForElem(Real_t& p_new_i, Real_t& bvc_i, Real_t& pbvc_i, const Real_t& e_old_i, + const Real_t& compression_i, const Real_t& vnewc_e, + const Real_t& pmin, const Real_t& p_cut, const Real_t& eosvmax) +{ + const Real_t c1s = Real_t(2.0) / Real_t(3.0); + bvc_i = c1s * (compression_i + Real_t(1.)); + + pbvc_i = c1s; + + p_new_i = bvc_i * e_old_i; + + if(FABS(p_new_i) < p_cut) p_new_i = Real_t(0.0); + + if(vnewc_e >= eosvmax) /* impossible condition here? */ + p_new_i = Real_t(0.0); + + if(p_new_i < pmin) p_new_i = pmin; +} + +static inline void +CalcEnergyForElems(Real_t* p_new, Real_t* e_new, Real_t* q_new, Real_t* bvc, Real_t* pbvc, + Real_t* p_old, Real_t* e_old, Real_t* q_old, Real_t* compression, + Real_t* compHalfStep, Real_t* vnewc, Real_t* work, Real_t* delvc, + Real_t pmin, Real_t p_cut, Real_t e_cut, Real_t q_cut, Real_t emin, + Real_t* qq_old, Real_t* ql_old, Real_t rho0, Real_t eosvmax, + Index_t length, Domain& domain, Index_t r) +{ + Kokkos::parallel_for( + "CalcEnergyForElems", length, KOKKOS_LAMBDA(const int i) { + const Real_t delvc_i = delvc[i]; + const Real_t p_old_i = p_old[i]; + const Real_t q_old_i = q_old[i]; + Real_t e_new_i = e_old[i] - Real_t(0.5) * delvc_i * (p_old_i + q_old_i) + + Real_t(0.5) * work[i]; + + if(e_new_i < emin) + { + e_new_i = emin; + } + + Real_t bvc_i, pbvc_i; + Real_t pHalfStep_i; + const Real_t vnewc_e = vnewc[domain.regElemlist(r, i)]; + const Real_t compHalfStep_i = compHalfStep[i]; + CalcPressureForElem(pHalfStep_i, bvc_i, pbvc_i, e_new_i, compHalfStep_i, + vnewc_e, pmin, p_cut, eosvmax); + + Real_t vhalf = Real_t(1.) / (Real_t(1.) + compHalfStep_i); + + Real_t q_new_i; + const Real_t ql_old_i = ql_old[i]; + const Real_t qq_old_i = qq_old[i]; + if(delvc_i > Real_t(0.)) + { + q_new_i /* = qq_old[i] = ql_old[i] */ = Real_t(0.); + } + else + { + Real_t ssc = + (pbvc_i * e_new_i + vhalf * vhalf * bvc_i * pHalfStep_i) / rho0; + + if(ssc <= Real_t(.1111111e-36)) + { + ssc = Real_t(.3333333e-18); + } + else + { + ssc = SQRT(ssc); + } + + q_new_i = (ssc * ql_old_i + qq_old_i); + } + + e_new_i = e_new_i + Real_t(0.5) * delvc_i * + (Real_t(3.0) * (p_old_i + q_old_i) - + Real_t(4.0) * (pHalfStep_i + q_new_i)); + + e_new_i += Real_t(0.5) * work[i]; + + if(FABS(e_new_i) < e_cut) + { + e_new_i = Real_t(0.); + } + if(e_new_i < emin) + { + e_new_i = emin; + } + Real_t p_new_i; + const Real_t compression_i = compression[i]; + CalcPressureForElem(p_new_i, bvc_i, pbvc_i, e_new_i, compression_i, vnewc_e, + pmin, p_cut, eosvmax); + + const Real_t sixth = Real_t(1.0) / Real_t(6.0); + Real_t q_tilde; + + if(delvc_i > Real_t(0.)) + { + q_tilde = Real_t(0.); + } + else + { + Real_t ssc = + (pbvc_i * e_new_i + vnewc_e * vnewc_e * bvc_i * p_new_i) / rho0; + + if(ssc <= Real_t(.1111111e-36)) + { + ssc = Real_t(.3333333e-18); + } + else + { + ssc = SQRT(ssc); + } + + q_tilde = (ssc * ql_old_i + qq_old_i); + } + + e_new_i = + e_new_i - (Real_t(7.0) * (p_old_i + q_old_i) - + Real_t(8.0) * (pHalfStep_i + q_new_i) + (p_new_i + q_tilde)) * + delvc_i * sixth; + + if(FABS(e_new_i) < e_cut) + { + e_new_i = Real_t(0.); + } + if(e_new_i < emin) + { + e_new_i = emin; + } + + CalcPressureForElem(p_new_i, bvc_i, pbvc_i, e_new_i, compression_i, vnewc_e, + pmin, p_cut, eosvmax); + bvc[i] = bvc_i; + pbvc[i] = pbvc_i; + p_new[i] = p_new_i; + + if(delvc_i <= Real_t(0.)) + { + Real_t ssc = + (pbvc_i * e_new_i + vnewc_e * vnewc_e * bvc_i * p_new_i) / rho0; + + if(ssc <= Real_t(.1111111e-36)) + { + ssc = Real_t(.3333333e-18); + } + else + { + ssc = SQRT(ssc); + } + + q_new_i = (ssc * ql_old_i + qq_old_i); + + if(FABS(q_new_i) < q_cut) q_new_i = Real_t(0.); + } + q_new[i] = q_new_i; + e_new[i] = e_new_i; + }); + + return; +} + +static inline void +CalcSoundSpeedForElems(Domain& domain, Real_t* vnewc, Real_t rho0, Real_t* enewc, + Real_t* pnewc, Real_t* pbvc, Real_t* bvc, Real_t ss4o3, + Index_t len, Index_t r) +{ + Kokkos::parallel_for( + "CalcSoundSpeedForElems", len, KOKKOS_LAMBDA(const int i) { + Index_t ielem = domain.regElemlist(r, i); + Real_t ssTmp = + (pbvc[i] * enewc[i] + vnewc[ielem] * vnewc[ielem] * bvc[i] * pnewc[i]) / + rho0; + if(ssTmp <= Real_t(.1111111e-36)) + { + ssTmp = Real_t(.3333333e-18); + } + else + { + ssTmp = SQRT(ssTmp); + } + domain.ss(ielem) = ssTmp; + }); +} + +static inline void +EvalEOSForElems(Domain& domain, Real_t* vnewc, Int_t numElemReg, Index_t r, Int_t rep) +{ + Real_t e_cut = domain.e_cut(); + Real_t p_cut = domain.p_cut(); + Real_t ss4o3 = domain.ss4o3(); + Real_t q_cut = domain.q_cut(); + + Real_t eosvmax = domain.eosvmax(); + Real_t eosvmin = domain.eosvmin(); + Real_t pmin = domain.pmin(); + Real_t emin = domain.emin(); + Real_t rho0 = domain.refdens(); + + ResizeBuffer((numElemReg * sizeof(Real_t) + 4096) * 16); + + Real_t* e_old = AllocateFromBuffer(numElemReg); + Real_t* delvc = AllocateFromBuffer(numElemReg); + Real_t* p_old = AllocateFromBuffer(numElemReg); + Real_t* q_old = AllocateFromBuffer(numElemReg); + Real_t* compression = AllocateFromBuffer(numElemReg); + Real_t* compHalfStep = AllocateFromBuffer(numElemReg); + Real_t* qq_old = AllocateFromBuffer(numElemReg); + Real_t* ql_old = AllocateFromBuffer(numElemReg); + Real_t* work = AllocateFromBuffer(numElemReg); + Real_t* p_new = AllocateFromBuffer(numElemReg); + Real_t* e_new = AllocateFromBuffer(numElemReg); + Real_t* q_new = AllocateFromBuffer(numElemReg); + Real_t* bvc = AllocateFromBuffer(numElemReg); + Real_t* pbvc = AllocateFromBuffer(numElemReg); + + for(Int_t j = 0; j < rep; j++) + { + Kokkos::parallel_for( + "EvalEOSForElems A", numElemReg, KOKKOS_LAMBDA(const int i) { + Index_t ielem = domain.regElemlist(r, i); + e_old[i] = domain.c_e(ielem); + delvc[i] = domain.c_delv(ielem); + p_old[i] = domain.c_p(ielem); + q_old[i] = domain.c_q(ielem); + qq_old[i] = domain.c_qq(ielem); + ql_old[i] = domain.c_ql(ielem); + const Real_t vnewc_ielem = vnewc[ielem]; + Real_t vchalf; + compression[i] = Real_t(1.) / vnewc_ielem - Real_t(1.); + vchalf = vnewc_ielem - delvc[i] * Real_t(.5); + compHalfStep[i] = Real_t(1.) / vchalf - Real_t(1.); + + if(eosvmin != Real_t(0.)) + { + if(vnewc_ielem <= eosvmin) + { /* impossible due to calling func? */ + compHalfStep[i] = compression[i]; + } + } + if(eosvmax != Real_t(0.)) + { + if(vnewc_ielem >= eosvmax) + { /* impossible due to calling func? */ + p_old[i] = Real_t(0.); + compression[i] = Real_t(0.); + compHalfStep[i] = Real_t(0.); + } + } + work[i] = Real_t(0.); + }); + + CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc, p_old, e_old, q_old, + compression, compHalfStep, vnewc, work, delvc, pmin, p_cut, + e_cut, q_cut, emin, qq_old, ql_old, rho0, eosvmax, numElemReg, + domain, r); + } + + Kokkos::parallel_for( + "EvalEOSForElems F", numElemReg, KOKKOS_LAMBDA(const int i) { + Index_t ielem = domain.regElemlist(r, i); + domain.p(ielem) = p_new[i]; + domain.e(ielem) = e_new[i]; + domain.q(ielem) = q_new[i]; + }); + + CalcSoundSpeedForElems(domain, vnewc, rho0, e_new, p_new, pbvc, bvc, ss4o3, + numElemReg, r); +} + +static inline void +ApplyMaterialPropertiesForElems(Domain& domain) +{ + Index_t numElem = domain.numElem(); + + if(numElem != 0) + { + Real_t eosvmin = domain.eosvmin(); + Real_t eosvmax = domain.eosvmax(); + Kokkos::View vnewc("vnewc", numElem); + + Kokkos::parallel_for( + "ApplyMaterialPropertiesForElems A", numElem, + KOKKOS_LAMBDA(const int i) { vnewc[i] = domain.vnew(i); }); + + if(eosvmin != Real_t(0.)) + { + Kokkos::parallel_for( + "ApplyMaterialPropertiesForElems B", numElem, KOKKOS_LAMBDA(const int i) { + if(vnewc[i] < eosvmin) vnewc[i] = eosvmin; + }); + } + + if(eosvmax != Real_t(0.)) + { + Kokkos::parallel_for( + "ApplyMaterialPropertiesForElems C", numElem, KOKKOS_LAMBDA(const int i) { + if(vnewc[i] > eosvmax) vnewc[i] = eosvmax; + }); + } + + int error = 0; + Kokkos::parallel_reduce( + "ApplyMaterialPropertiesForElems", numElem, + KOKKOS_LAMBDA(const int i, int& err) { + Real_t vc = domain.v(i); + if(eosvmin != Real_t(0.)) + { + if(vc < eosvmin) vc = eosvmin; + } + if(eosvmax != Real_t(0.)) + { + if(vc > eosvmax) vc = eosvmax; + } + if(vc <= 0.) + { + err++; + } + }, + error); + + if(error != 0) +#if USE_MPI + MPI_Abort(MPI_COMM_WORLD, VolumeError); +#else + exit(VolumeError); +#endif + + for(Int_t r = 0; r < domain.numReg(); r++) + { + Index_t numElemReg = domain.regElemSize(r); + // Index_t *regElemList = domain.regElemlist(r); + Int_t rep; + if(r < domain.numReg() / 2) + rep = 1; + else if(r < (domain.numReg() - (domain.numReg() + 15) / 20)) + rep = 1 + domain.cost(); + else + rep = 10 * (1 + domain.cost()); + EvalEOSForElems(domain, vnewc.data(), numElemReg, r, rep); + } + } +} + +static inline void +UpdateVolumesForElems(Domain& domain, Real_t v_cut, Index_t length) +{ + if(length != 0) + { + Kokkos::parallel_for( + "UpdateVolumesForElems", length, KOKKOS_LAMBDA(const int i) { + Real_t tmpV = domain.vnew(i); + + if(FABS(tmpV - Real_t(1.0)) < v_cut) tmpV = Real_t(1.0); + + domain.v(i) = tmpV; + }); + } + + return; +} + +static inline void +LagrangeElements(Domain& domain, Index_t numElem) +{ + CalcLagrangeElements(domain); + + CalcQForElems(domain); + + ApplyMaterialPropertiesForElems(domain); + + UpdateVolumesForElems(domain, domain.v_cut(), numElem); +} + +static inline void +CalcCourantConstraintForElems(Domain& domain, Index_t length, Index_t r, Real_t qqc, + Real_t& dtcourant) +{ + typedef Kokkos::View view_real_t; + + Real_t qqc2 = Real_t(64.0) * qqc * qqc; + Real_t dtcourant_tmp = dtcourant; + Index_t courant_elem = -1; + + MinFinder result; + + Kokkos::parallel_reduce( + "CalcCourantConstraintForElems", length, + KOKKOS_LAMBDA(const int i, MinFinder& minf) { + Index_t indx = domain.regElemlist(r, i); + Real_t dtf = domain.ss(indx) * domain.ss(indx); + + if(domain.vdov(indx) < Real_t(0.)) + { + dtf = dtf + qqc2 * domain.arealg(indx) * domain.arealg(indx) * + domain.vdov(indx) * domain.vdov(indx); + } + + dtf = SQRT(dtf); + dtf = domain.arealg(indx) / dtf; + + MinFinder tmp(dtf, i); + if(domain.vdov(indx) != Real_t(0.)) + { + minf += tmp; + } + }, + result); + + dtcourant_tmp = result.val; + + if(dtcourant_tmp > dtcourant) + { + dtcourant_tmp = dtcourant; + } + + courant_elem = result.i; + + if(courant_elem != -1) + { + dtcourant = dtcourant_tmp; + } + + return; +} + +static inline void +CalcHydroConstraintForElems(Domain& domain, Index_t length, Index_t r, Real_t dvovmax, + Real_t& dthydro) +{ + typedef Kokkos::View view_real_t; + + Real_t dthydro_tmp = dthydro; + Index_t hydro_elem = -1; + MinFinder result; + + Kokkos::parallel_reduce( + "CalcHydroConstraintForElems", length, + KOKKOS_LAMBDA(const int i, MinFinder& minf) { + Index_t indx = domain.regElemlist(r, i); + + if(domain.vdov(indx) != Real_t(0.)) + { + Real_t dtdvov = dvovmax / (FABS(domain.vdov(indx)) + Real_t(1.e-20)); + + MinFinder tmp(dtdvov, i); + if(domain.vdov(indx) != Real_t(0.)) + { + minf += tmp; + } + } + }, + result); + + if(result.val > dthydro) + { + result.val = dthydro; + } + + if(result.i != -1) + { + dthydro = result.val; + } + + return; +} + +static inline void +CalcTimeConstraintsForElems(Domain& domain) +{ + domain.dtcourant() = 1.0e+20; + domain.dthydro() = 1.0e+20; + + for(Index_t r = 0; r < domain.numReg(); ++r) + { + CalcCourantConstraintForElems(domain, domain.regElemSize(r), r, domain.qqc(), + domain.dtcourant()); + + CalcHydroConstraintForElems(domain, domain.regElemSize(r), r, domain.dvovmax(), + domain.dthydro()); + } +} + +static inline void +LagrangeLeapFrog(Domain& domain) +{ +#ifdef SEDOV_SYNC_POS_VEL_LATE + Domain_member fieldData[6]; +#endif + LagrangeNodal(domain); + +#ifdef SEDOV_SYNC_POS_VEL_LATE +#endif + LagrangeElements(domain, domain.numElem()); + +#if USE_MPI +# ifdef SEDOV_SYNC_POS_VEL_LATE + CommRecv(domain, MSG_SYNC_POS_VEL, 6, domain.sizeX() + 1, domain.sizeY() + 1, + domain.sizeZ() + 1, false, false); + + fieldData[0] = &Domain::x; + fieldData[1] = &Domain::y; + fieldData[2] = &Domain::z; + fieldData[3] = &Domain::xd; + fieldData[4] = &Domain::yd; + fieldData[5] = &Domain::zd; + + CommSend(domain, MSG_SYNC_POS_VEL, 6, fieldData, domain.sizeX() + 1, + domain.sizeY() + 1, domain.sizeZ() + 1, false, false); +# endif +#endif + + CalcTimeConstraintsForElems(domain); + +#if USE_MPI +# ifdef SEDOV_SYNC_POS_VEL_LATE + CommSyncPosVel(domain); +# endif +#endif +} + +int +main(int argc, char* argv[]) +{ + Int_t numRanks; + Int_t myRank; + struct cmdLineOpts opts; + +#if USE_MPI + Domain_member fieldData; + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &numRanks); + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); +#else + numRanks = 1; + myRank = 0; +#endif + + Kokkos::initialize(argc, argv); + { + Kokkos::Tools::pushRegion("initialization"); + opts.its = 9999999; + opts.nx = 30; + opts.numReg = 11; + opts.numFiles = (int) (numRanks + 10) / 9; + opts.showProg = 0; + opts.quiet = 0; + opts.viz = 0; + opts.balance = 1; + opts.cost = 1; + opts.do_atomic = 0; + + ParseCommandLineOptions(argc, argv, myRank, &opts); + + if(opts.do_atomic == 1) + do_atomic = 1; + else + do_atomic = 0; + + if((myRank == 0) && (opts.quiet == 0)) + { + printf("Running problem size %d^3 per domain until completion\n", opts.nx); + printf("Num processors: %d\n", numRanks); + printf("Total number of elements: %lld\n\n", + (long long int) (numRanks * opts.nx * opts.nx * opts.nx)); + printf("To run other sizes, use -s .\n"); + printf("To run a fixed number of iterations, use -i .\n"); + printf("To run a more or less balanced region set, use -b .\n"); + printf("To change the relative costs of regions, use -c .\n"); + printf("To print out progress, use -p\n"); + printf("To write an output file for VisIt, use -v\n"); + printf("See help (-h) for more options\n\n"); + } + + Int_t col, row, plane, side; + InitMeshDecomp(numRanks, myRank, &col, &row, &plane, &side); + + // Build the main data structure and initialize it + Domain locDom(numRanks, col, row, plane, opts.nx, side, opts.numReg, opts.balance, + opts.cost); + +#if USE_MPI + fieldData = &Domain::nodalMass; + + // Initial domain boundary communication + CommRecv(locDom, MSG_COMM_SBN, 1, locDom.sizeX() + 1, locDom.sizeY() + 1, + locDom.sizeZ() + 1, true, false); + CommSend(locDom, MSG_COMM_SBN, 1, &fieldData, locDom.sizeX() + 1, + locDom.sizeY() + 1, locDom.sizeZ() + 1, true, false); + CommSBN(locDom, 1, &fieldData); + + // End initialization + MPI_Barrier(MPI_COMM_WORLD); +#endif + + Kokkos::Tools::popRegion(); + +#if USE_MPI + double start = MPI_Wtime(); +#else + timeval start; + gettimeofday(&start, nullptr); +#endif + + uint32_t _time_incrp = 0; + uint32_t _leap_frogp = 0; + Kokkos::Tools::createProfileSection("TimeIncr", &_time_incrp); + Kokkos::Tools::createProfileSection("LeapFrog", &_leap_frogp); + + while((locDom.time() < locDom.stoptime()) && (locDom.cycle() < opts.its)) + { + Kokkos::Tools::startSection(_time_incrp); + // CAUSAL_BEGIN("Iteration") + TimeIncrement(locDom); + Kokkos::Tools::stopSection(_time_incrp); + + Kokkos::Tools::startSection(_leap_frogp); + LagrangeLeapFrog(locDom); + Kokkos::Tools::stopSection(_leap_frogp); + CAUSAL_PROGRESS_NAMED("Iteration") + // CAUSAL_END("Iteration") + + if((opts.showProg != 0) && (opts.quiet == 0) && (myRank == 0)) + { + printf("cycle = %d, time = %e, dt=%e\n", locDom.cycle(), + double(locDom.time()), double(locDom.deltatime())); + } + Kokkos::Tools::markEvent("completed_timestep"); + CAUSAL_PROGRESS + } + + Kokkos::Tools::destroyProfileSection(_time_incrp); + Kokkos::Tools::destroyProfileSection(_leap_frogp); + + double elapsed_time; +#if USE_MPI + elapsed_time = MPI_Wtime() - start; +#else + timeval end; + gettimeofday(&end, NULL); + elapsed_time = (double) (end.tv_sec - start.tv_sec) + + ((double) (end.tv_usec - start.tv_usec)) / 1000000; +#endif + double elapsed_timeG; +#if USE_MPI + MPI_Reduce(&elapsed_time, &elapsed_timeG, 1, MPI_DOUBLE, MPI_MAX, 0, + MPI_COMM_WORLD); +#else + elapsed_timeG = elapsed_time; +#endif + + Kokkos::Tools::pushRegion("finalization"); + if(opts.viz) + { + DumpToVisit(locDom, opts.numFiles, myRank, numRanks); + } + + if((myRank == 0) && (opts.quiet == 0)) + { + VerifyAndWriteFinalOutput(elapsed_timeG, locDom, opts.nx, numRanks); + } + Kokkos::Tools::popRegion(); + + buffer = Kokkos::View(); + } + Kokkos::finalize(); +#if USE_MPI + MPI_Finalize(); +#endif + + return 0; +} diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh.h b/projects/rocprofiler-systems/examples/lulesh/lulesh.h new file mode 100644 index 0000000000..4ff3dffa1a --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh.h @@ -0,0 +1,873 @@ + +#if !defined(USE_MPI) +# error "You should specify USE_MPI=0 or USE_MPI=1 on the compile line" +#endif + +// OpenMP will be compiled in if this flag is set to 1 AND the compiler beging +// used supports it (i.e. the _OPENMP symbol is defined) +#define USE_OMP 1 + +#if USE_MPI +# include + +/* + define one of these three symbols: + + SEDOV_SYNC_POS_VEL_NONE + SEDOV_SYNC_POS_VEL_EARLY + SEDOV_SYNC_POS_VEL_LATE +*/ + +# define SEDOV_SYNC_POS_VEL_EARLY 1 +#endif + +#include +#include +#include + +#include +#include + +//************************************************** +// Allow flexibility for arithmetic representations +//************************************************** + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +// Precision specification +typedef float real4; +typedef double real8; +typedef long double real10; // 10 bytes on x86 + +typedef int Index_t; // array subscript and loop index +typedef real8 Real_t; // floating point representation +typedef int Int_t; // integer representation + +enum +{ + VolumeError = -1, + QStopError = -2 +}; + +KOKKOS_INLINE_FUNCTION real4 +SQRT(real4 arg) +{ + return sqrtf(arg); +} +KOKKOS_INLINE_FUNCTION real8 +SQRT(real8 arg) +{ + return sqrt(arg); +} +KOKKOS_INLINE_FUNCTION real10 +SQRT(real10 arg) +{ + return sqrtl(arg); +} + +KOKKOS_INLINE_FUNCTION real4 +CBRT(real4 arg) +{ + return cbrtf(arg); +} +KOKKOS_INLINE_FUNCTION real8 +CBRT(real8 arg) +{ + return cbrt(arg); +} +KOKKOS_INLINE_FUNCTION real10 +CBRT(real10 arg) +{ + return cbrtl(arg); +} + +KOKKOS_INLINE_FUNCTION real4 +FABS(real4 arg) +{ + return fabsf(arg); +} +KOKKOS_INLINE_FUNCTION real8 +FABS(real8 arg) +{ + return fabs(arg); +} +KOKKOS_INLINE_FUNCTION real10 +FABS(real10 arg) +{ + return fabsl(arg); +} + +// Stuff needed for boundary conditions +// 2 BCs on each of 6 hexahedral faces (12 bits) +#define XI_M 0x00007 +#define XI_M_SYMM 0x00001 +#define XI_M_FREE 0x00002 +#define XI_M_COMM 0x00004 + +#define XI_P 0x00038 +#define XI_P_SYMM 0x00008 +#define XI_P_FREE 0x00010 +#define XI_P_COMM 0x00020 + +#define ETA_M 0x001c0 +#define ETA_M_SYMM 0x00040 +#define ETA_M_FREE 0x00080 +#define ETA_M_COMM 0x00100 + +#define ETA_P 0x00e00 +#define ETA_P_SYMM 0x00200 +#define ETA_P_FREE 0x00400 +#define ETA_P_COMM 0x00800 + +#define ZETA_M 0x07000 +#define ZETA_M_SYMM 0x01000 +#define ZETA_M_FREE 0x02000 +#define ZETA_M_COMM 0x04000 + +#define ZETA_P 0x38000 +#define ZETA_P_SYMM 0x08000 +#define ZETA_P_FREE 0x10000 +#define ZETA_P_COMM 0x20000 + +// MPI Message Tags +#define MSG_COMM_SBN 1024 +#define MSG_SYNC_POS_VEL 2048 +#define MSG_MONOQ 3072 + +#define MAX_FIELDS_PER_MPI_COMM 6 + +// Assume 128 byte coherence +// Assume Real_t is an "integral power of 2" bytes wide +#define CACHE_COHERENCE_PAD_REAL (128 / sizeof(Real_t)) + +#define CACHE_ALIGN_REAL(n) \ + (((n) + (CACHE_COHERENCE_PAD_REAL - 1)) & ~(CACHE_COHERENCE_PAD_REAL - 1)) + +////////////////////////////////////////////////////// +// Primary data structure +////////////////////////////////////////////////////// + +/* + * The implementation of the data abstraction used for lulesh + * resides entirely in the Domain class below. You can change + * grouping and interleaving of fields here to maximize data layout + * efficiency for your underlying architecture or compiler. + * + * For example, fields can be implemented as STL objects or + * raw array pointers. As another example, individual fields + * m_x, m_y, m_z could be budled into + * + * struct { Real_t x, y, z ; } *m_coord ; + * + * allowing accessor functions such as + * + * "Real_t &x(Index_t idx) { return m_coord[idx].x ; }" + * "Real_t &y(Index_t idx) { return m_coord[idx].y ; }" + * "Real_t &z(Index_t idx) { return m_coord[idx].z ; }" + */ + +class Domain +{ +public: + // Constructor + Domain(Int_t numRanks, Index_t colLoc, Index_t rowLoc, Index_t planeLoc, Index_t nx, + Int_t tp, Int_t nr, Int_t balance, Int_t cost); + + // Destructor + KOKKOS_FUNCTION ~Domain(); + + // + // ALLOCATION + // + + void AllocateNodePersistent(Int_t numNode) // Node-centered + { + Kokkos::resize(m_x, numNode); // coordinates + Kokkos::resize(m_y, numNode); + Kokkos::resize(m_z, numNode); + + Kokkos::resize(m_xd, numNode); // velocities + Kokkos::resize(m_yd, numNode); + Kokkos::resize(m_zd, numNode); + + Kokkos::resize(m_xdd, numNode); // accelerations + Kokkos::resize(m_ydd, numNode); + Kokkos::resize(m_zdd, numNode); + + Kokkos::resize(m_fx, numNode); // forces + Kokkos::resize(m_fy, numNode); + Kokkos::resize(m_fz, numNode); + + Kokkos::resize(m_nodalMass, numNode); // mass + + m_c_x = m_x; + m_c_y = m_y; + m_c_z = m_z; + m_c_xd = m_xd; + m_c_yd = m_yd; + m_c_zd = m_zd; + } + + void AllocateElemPersistent(Int_t numElem) // Elem-centered + { + Kokkos::resize(m_nodelist, numElem); + + // elem connectivities through face + Kokkos::resize(m_lxim, numElem); + Kokkos::resize(m_lxip, numElem); + Kokkos::resize(m_letam, numElem); + Kokkos::resize(m_letap, numElem); + Kokkos::resize(m_lzetam, numElem); + Kokkos::resize(m_lzetap, numElem); + + Kokkos::resize(m_elemBC, numElem); + + Kokkos::resize(m_e, numElem); + Kokkos::resize(m_p, numElem); + + Kokkos::resize(m_q, numElem); + Kokkos::resize(m_ql, numElem); + Kokkos::resize(m_qq, numElem); + + Kokkos::resize(m_v, numElem); + + Kokkos::resize(m_volo, numElem); + Kokkos::resize(m_delv, numElem); + Kokkos::resize(m_vdov, numElem); + + Kokkos::resize(m_arealg, numElem); + + Kokkos::resize(m_ss, numElem); + + Kokkos::resize(m_elemMass, numElem); + + Kokkos::resize(m_vnew, numElem); + + m_c_e = m_e; + m_c_p = m_p; + m_c_q = m_q; + m_c_ql = m_ql; + m_c_qq = m_qq; + m_c_delv = m_delv; + } + + void AllocateGradients(Int_t numElem, Int_t allElem) + { + // Position gradients + Kokkos::resize(m_delx_xi, numElem); + Kokkos::resize(m_delx_eta, numElem); + Kokkos::resize(m_delx_zeta, numElem); + + // Velocity gradients + Kokkos::resize(m_delv_xi, allElem); + Kokkos::resize(m_delv_eta, allElem); + Kokkos::resize(m_delv_zeta, allElem); + } + + void DeallocateGradients() + { + m_delx_zeta = Kokkos::View(); + m_delx_eta = Kokkos::View(); + m_delx_xi = Kokkos::View(); + + m_delv_zeta = Kokkos::View(); + m_delv_eta = Kokkos::View(); + m_delv_xi = Kokkos::View(); + } + + void AllocateStrains(Int_t numElem) + { + Kokkos::resize(m_dxx, numElem); + Kokkos::resize(m_dyy, numElem); + Kokkos::resize(m_dzz, numElem); + } + + void DeallocateStrains() + { + m_dzz = Kokkos::View(); + m_dyy = Kokkos::View(); + m_dxx = Kokkos::View(); + } + + // + // ACCESSORS + // + KOKKOS_INLINE_FUNCTION + const Kokkos::View& e_view() const { return m_e; } + + // Node-centered + + // Nodal coordinates + KOKKOS_INLINE_FUNCTION Real_t& x(const Index_t idx) const { return m_x[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& y(const Index_t idx) const { return m_y[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& z(const Index_t idx) const { return m_z[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_x(const Index_t idx) const { return m_c_x[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_y(const Index_t idx) const { return m_c_y[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_z(const Index_t idx) const { return m_c_z[idx]; } + + // Nodal velocities + KOKKOS_INLINE_FUNCTION Real_t& xd(const Index_t idx) const { return m_xd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& yd(const Index_t idx) const { return m_yd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& zd(const Index_t idx) const { return m_zd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_xd(const Index_t idx) const { return m_c_xd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_yd(const Index_t idx) const { return m_c_yd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_zd(const Index_t idx) const { return m_c_zd[idx]; } + + // Nodal accelerations + KOKKOS_INLINE_FUNCTION Real_t& xdd(const Index_t idx) const { return m_xdd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& ydd(const Index_t idx) const { return m_ydd[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& zdd(const Index_t idx) const { return m_zdd[idx]; } + + // Nodal forces + KOKKOS_INLINE_FUNCTION Real_t& fx(const Index_t idx) const { return m_fx[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& fy(const Index_t idx) const { return m_fy[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& fz(const Index_t idx) const { return m_fz[idx]; } + + // Nodal mass + KOKKOS_INLINE_FUNCTION Real_t& nodalMass(const Index_t idx) const + { + return m_nodalMass[idx]; + } + + // Nodes on symmertry planes + KOKKOS_INLINE_FUNCTION Index_t symmX(const Index_t idx) const { return m_symmX[idx]; } + KOKKOS_INLINE_FUNCTION Index_t symmY(const Index_t idx) const { return m_symmY[idx]; } + KOKKOS_INLINE_FUNCTION Index_t symmZ(const Index_t idx) const { return m_symmZ[idx]; } + KOKKOS_INLINE_FUNCTION bool symmXempty() { return m_symmX.data() == nullptr; } + KOKKOS_INLINE_FUNCTION bool symmYempty() { return m_symmY.data() == nullptr; } + KOKKOS_INLINE_FUNCTION bool symmZempty() { return m_symmZ.data() == nullptr; } + + // + // Element-centered + // + Index_t& regElemSize(Index_t idx) { return m_regElemSize[idx]; } + Index_t& regNumList(Index_t idx) { return m_regNumList[idx]; } + Index_t* regNumList() { return &m_regNumList[0]; } + Index_t* regElemlist(Int_t r) const + { + return &m_regElemlist.entries(m_regElemlist.row_map(r)); + } + KOKKOS_INLINE_FUNCTION Index_t regElemlist(const Int_t r, Index_t idx) const + { + return m_regElemlist.entries(m_regElemlist.row_map(r) + idx); + } + + KOKKOS_INLINE_FUNCTION Index_t& nodelist(Index_t i, Index_t j) const + { + return m_nodelist(i, j); + } + + // elem connectivities through face + KOKKOS_INLINE_FUNCTION Index_t& lxim(const Index_t idx) const { return m_lxim[idx]; } + KOKKOS_INLINE_FUNCTION Index_t& lxip(const Index_t idx) const { return m_lxip[idx]; } + KOKKOS_INLINE_FUNCTION Index_t& letam(const Index_t idx) const + { + return m_letam[idx]; + } + KOKKOS_INLINE_FUNCTION Index_t& letap(const Index_t idx) const + { + return m_letap[idx]; + } + KOKKOS_INLINE_FUNCTION Index_t& lzetam(const Index_t idx) const + { + return m_lzetam[idx]; + } + KOKKOS_INLINE_FUNCTION Index_t& lzetap(const Index_t idx) const + { + return m_lzetap[idx]; + } + + // elem face symm/free-surface flag + KOKKOS_INLINE_FUNCTION Int_t& elemBC(const Index_t idx) const + { + return m_elemBC[idx]; + } + + // Principal strains - temporary + KOKKOS_INLINE_FUNCTION Real_t& dxx(const Index_t idx) const { return m_dxx[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& dyy(const Index_t idx) const { return m_dyy[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& dzz(const Index_t idx) const { return m_dzz[idx]; } + + // New relative volume - temporary + KOKKOS_INLINE_FUNCTION Real_t& vnew(const Index_t idx) const { return m_vnew[idx]; } + + // Velocity gradient - temporary + KOKKOS_INLINE_FUNCTION Real_t& delv_xi(const Index_t idx) const + { + return m_delv_xi[idx]; + } + KOKKOS_INLINE_FUNCTION Real_t& delv_eta(const Index_t idx) const + { + return m_delv_eta[idx]; + } + KOKKOS_INLINE_FUNCTION Real_t& delv_zeta(const Index_t idx) const + { + return m_delv_zeta[idx]; + } + + // Position gradient - temporary + KOKKOS_INLINE_FUNCTION Real_t& delx_xi(const Index_t idx) const + { + return m_delx_xi[idx]; + } + KOKKOS_INLINE_FUNCTION Real_t& delx_eta(const Index_t idx) const + { + return m_delx_eta[idx]; + } + KOKKOS_INLINE_FUNCTION Real_t& delx_zeta(const Index_t idx) const + { + return m_delx_zeta[idx]; + } + // Energy + KOKKOS_INLINE_FUNCTION Real_t& e(const Index_t idx) const { return m_e[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_e(const Index_t idx) const { return m_c_e[idx]; } + + // Pressure + KOKKOS_INLINE_FUNCTION Real_t& p(const Index_t idx) const { return m_p[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_p(const Index_t idx) const { return m_c_p[idx]; } + + // Artificial viscosity + KOKKOS_INLINE_FUNCTION Real_t& q(const Index_t idx) const { return m_q[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_q(const Index_t idx) const { return m_c_q[idx]; } + + // Linear term for q + KOKKOS_INLINE_FUNCTION Real_t& ql(const Index_t idx) const { return m_ql[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_ql(const Index_t idx) const { return m_c_ql[idx]; } + // Quadratic term for q + KOKKOS_INLINE_FUNCTION Real_t& qq(const Index_t idx) const { return m_qq[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_qq(const Index_t idx) const { return m_c_qq[idx]; } + + // Relative volume + KOKKOS_INLINE_FUNCTION Real_t& v(const Index_t idx) const { return m_v[idx]; } + KOKKOS_INLINE_FUNCTION Real_t& delv(const Index_t idx) const { return m_delv[idx]; } + KOKKOS_INLINE_FUNCTION Real_t c_delv(const Index_t idx) const + { + return m_c_delv[idx]; + } + + // Reference volume + KOKKOS_INLINE_FUNCTION Real_t& volo(Index_t idx) const { return m_volo[idx]; } + + // volume derivative over volume + KOKKOS_INLINE_FUNCTION Real_t& vdov(Index_t idx) const { return m_vdov[idx]; } + + // Element characteristic length + KOKKOS_INLINE_FUNCTION Real_t& arealg(Index_t idx) const { return m_arealg[idx]; } + + // Sound speed + KOKKOS_INLINE_FUNCTION Real_t& ss(const Index_t idx) const { return m_ss[idx]; } + + // Element mass + KOKKOS_INLINE_FUNCTION Real_t& elemMass(const Index_t idx) const + { + return m_elemMass[idx]; + } + + KOKKOS_INLINE_FUNCTION Index_t nodeElemCount(Index_t idx) const + { + return m_nodeElemStart[idx + 1] - m_nodeElemStart[idx]; + } + + KOKKOS_INLINE_FUNCTION Index_t* nodeElemCornerList(Index_t idx) const + { + return &m_nodeElemCornerList[m_nodeElemStart[idx]]; + } + + // Parameters + + // Cutoffs + KOKKOS_INLINE_FUNCTION Real_t u_cut() const { return m_u_cut; } + KOKKOS_INLINE_FUNCTION Real_t e_cut() const { return m_e_cut; } + KOKKOS_INLINE_FUNCTION Real_t p_cut() const { return m_p_cut; } + KOKKOS_INLINE_FUNCTION Real_t q_cut() const { return m_q_cut; } + KOKKOS_INLINE_FUNCTION Real_t v_cut() const { return m_v_cut; } + + // Other constants (usually are settable via input file in real codes) + KOKKOS_INLINE_FUNCTION Real_t hgcoef() const { return m_hgcoef; } + KOKKOS_INLINE_FUNCTION Real_t qstop() const { return m_qstop; } + KOKKOS_INLINE_FUNCTION Real_t monoq_max_slope() const { return m_monoq_max_slope; } + KOKKOS_INLINE_FUNCTION Real_t monoq_limiter_mult() const + { + return m_monoq_limiter_mult; + } + KOKKOS_INLINE_FUNCTION Real_t ss4o3() const { return m_ss4o3; } + KOKKOS_INLINE_FUNCTION Real_t qlc_monoq() const { return m_qlc_monoq; } + KOKKOS_INLINE_FUNCTION Real_t qqc_monoq() const { return m_qqc_monoq; } + KOKKOS_INLINE_FUNCTION Real_t qqc() const { return m_qqc; } + + KOKKOS_INLINE_FUNCTION Real_t eosvmax() const { return m_eosvmax; } + KOKKOS_INLINE_FUNCTION Real_t eosvmin() const { return m_eosvmin; } + KOKKOS_INLINE_FUNCTION Real_t pmin() const { return m_pmin; } + KOKKOS_INLINE_FUNCTION Real_t emin() const { return m_emin; } + KOKKOS_INLINE_FUNCTION Real_t dvovmax() const { return m_dvovmax; } + KOKKOS_INLINE_FUNCTION Real_t refdens() const { return m_refdens; } + + // Timestep controls, etc... + Real_t& time() { return m_time; } + Real_t& deltatime() { return m_deltatime; } + Real_t& deltatimemultlb() { return m_deltatimemultlb; } + Real_t& deltatimemultub() { return m_deltatimemultub; } + Real_t& stoptime() { return m_stoptime; } + Real_t& dtcourant() { return m_dtcourant; } + Real_t& dthydro() { return m_dthydro; } + Real_t& dtmax() { return m_dtmax; } + Real_t& dtfixed() { return m_dtfixed; } + + Int_t& cycle() { return m_cycle; } + Index_t& numRanks() { return m_numRanks; } + + Index_t& colLoc() { return m_colLoc; } + Index_t& rowLoc() { return m_rowLoc; } + Index_t& planeLoc() { return m_planeLoc; } + Index_t& tp() { return m_tp; } + + Index_t& sizeX() { return m_sizeX; } + Index_t& sizeY() { return m_sizeY; } + Index_t& sizeZ() { return m_sizeZ; } + Index_t& numReg() { return m_numReg; } + Int_t& cost() { return m_cost; } + Index_t& numElem() { return m_numElem; } + Index_t& numNode() { return m_numNode; } + + Index_t& maxPlaneSize() { return m_maxPlaneSize; } + Index_t& maxEdgeSize() { return m_maxEdgeSize; } + + // + // MPI-Related additional data + // + +#if USE_MPI + // Communication Work space + Real_t* commDataSend; + Real_t* commDataRecv; + + // Maximum number of block neighbors + MPI_Request recvRequest[26]; // 6 faces + 12 edges + 8 corners + MPI_Request sendRequest[26]; // 6 faces + 12 edges + 8 corners +#endif + +private: + void BuildMesh(Int_t nx, Int_t edgeNodes, Int_t edgeElems); + void SetupThreadSupportStructures(); + void CreateRegionIndexSets(Int_t nreg, Int_t balance); + void SetupCommBuffers(Int_t edgeNodes); + void SetupSymmetryPlanes(Int_t edgeNodes); + void SetupElementConnectivities(Int_t edgeElems); + void SetupBoundaryConditions(Int_t edgeElems); + + // + // IMPLEMENTATION + // + + /* Node-centered */ + Kokkos::View m_x; /* coordinates */ + Kokkos::View m_y; + Kokkos::View m_z; + Kokkos::View> + m_c_x; /* coordinates */ + Kokkos::View> + m_c_y; /* coordinates */ + Kokkos::View> + m_c_z; /* coordinates */ + + Kokkos::View m_xd; /* velocities */ + Kokkos::View m_yd; + Kokkos::View m_zd; + Kokkos::View> + m_c_xd; /* coordinates */ + Kokkos::View> + m_c_yd; /* coordinates */ + Kokkos::View> + m_c_zd; /* coordinates */ + + Kokkos::View m_xdd; /* accelerations */ + Kokkos::View m_ydd; + Kokkos::View m_zdd; + + Kokkos::View m_fx; /* forces */ + Kokkos::View m_fy; + Kokkos::View m_fz; + + Kokkos::View m_nodalMass; /* mass */ + + Kokkos::View m_symmX; /* symmetry plane nodesets */ + Kokkos::View m_symmY; + Kokkos::View m_symmZ; + + // Element-centered + + // Region information + Int_t m_numReg; + Int_t m_cost; // imbalance cost + Index_t* m_regElemSize; // Size of region sets + Index_t* m_regNumList; // Region number per domain element + // Index_t **m_regElemlist; // region indexset + using t_regElemlist = + Kokkos::StaticCrsGraph, Index_t>; + t_regElemlist m_regElemlist; + + Kokkos::View + m_nodelist; /* elemToNode connectivity */ + + Kokkos::View m_lxim; /* element connectivity across each face */ + Kokkos::View m_lxip; + Kokkos::View m_letam; + Kokkos::View m_letap; + Kokkos::View m_lzetam; + Kokkos::View m_lzetap; + + Kokkos::View m_elemBC; /* symmetry/free-surface flags for each elem face */ + + Kokkos::View m_dxx; /* principal strains -- temporary */ + Kokkos::View m_dyy; + Kokkos::View m_dzz; + + Kokkos::View m_delv_xi; /* velocity gradient -- temporary */ + Kokkos::View m_delv_eta; + Kokkos::View m_delv_zeta; + + Kokkos::View m_delx_xi; /* coordinate gradient -- temporary */ + Kokkos::View m_delx_eta; + Kokkos::View m_delx_zeta; + + Kokkos::View m_e; /* energy */ + + Kokkos::View m_p; /* pressure */ + Kokkos::View m_q; /* q */ + Kokkos::View m_ql; /* linear term for q */ + Kokkos::View m_qq; /* quadratic term for q */ + + Kokkos::View m_v; /* relative volume */ + Kokkos::View m_volo; /* reference volume */ + Kokkos::View m_vnew; /* new relative volume -- temporary */ + Kokkos::View m_delv; /* m_vnew - m_v */ + Kokkos::View m_vdov; /* volume derivative over volume */ + + Kokkos::View> + m_c_e; /* coordinates */ + Kokkos::View> + m_c_p; /* coordinates */ + Kokkos::View> + m_c_q; /* coordinates */ + Kokkos::View> + m_c_ql; /* coordinates */ + Kokkos::View> + m_c_qq; /* coordinates */ + Kokkos::View> + m_c_delv; /* coordinates */ + + Kokkos::View m_arealg; /* characteristic length of an element */ + + Kokkos::View m_ss; /* "sound speed" */ + + Kokkos::View m_elemMass; /* mass */ + + // Cutoffs (treat as constants) + const Real_t m_e_cut; // energy tolerance + const Real_t m_p_cut; // pressure tolerance + const Real_t m_q_cut; // q tolerance + const Real_t m_v_cut; // relative volume tolerance + const Real_t m_u_cut; // velocity tolerance + + // Other constants (usually setable, but hardcoded in this proxy app) + + const Real_t m_hgcoef; // hourglass control + const Real_t m_ss4o3; + const Real_t m_qstop; // excessive q indicator + const Real_t m_monoq_max_slope; + const Real_t m_monoq_limiter_mult; + const Real_t m_qlc_monoq; // linear term coef for q + const Real_t m_qqc_monoq; // quadratic term coef for q + const Real_t m_qqc; + const Real_t m_eosvmax; + const Real_t m_eosvmin; + const Real_t m_pmin; // pressure floor + const Real_t m_emin; // energy floor + const Real_t m_dvovmax; // maximum allowable volume change + const Real_t m_refdens; // reference density + + // Variables to keep track of timestep, simulation time, and cycle + Real_t m_dtcourant; // courant constraint + Real_t m_dthydro; // volume change constraint + Int_t m_cycle; // iteration count for simulation + Real_t m_dtfixed; // fixed time increment + Real_t m_time; // current time + Real_t m_deltatime; // variable time increment + Real_t m_deltatimemultlb; + Real_t m_deltatimemultub; + Real_t m_dtmax; // maximum allowable time increment + Real_t m_stoptime; // end time for simulation + + Int_t m_numRanks; + + Index_t m_colLoc; + Index_t m_rowLoc; + Index_t m_planeLoc; + Index_t m_tp; + + Index_t m_sizeX; + Index_t m_sizeY; + Index_t m_sizeZ; + Index_t m_numElem; + Index_t m_numNode; + + Index_t m_maxPlaneSize; + Index_t m_maxEdgeSize; + + // OMP hack + Kokkos::View m_nodeElemStart; + Kokkos::View m_nodeElemCornerList; + + // Used in setup + Index_t m_rowMin, m_rowMax; + Index_t m_colMin, m_colMax; + Index_t m_planeMin, m_planeMax; +}; +typedef Real_t& (Domain::*Domain_member)(Index_t) const; + +struct cmdLineOpts +{ + Int_t its; // -i + Int_t nx; // -s + Int_t numReg; // -r + Int_t numFiles; // -f + Int_t showProg; // -p + Int_t quiet; // -q + Int_t viz; // -v + Int_t cost; // -c + Int_t balance; // -b + Int_t do_atomic; // -a +}; + +// Function Prototypes + +// lulesh-par +/*Real_t CalcElemVolume( const Real_t x[8], + const Real_t y[8], + const Real_t z[8]);*/ + +// lulesh-util +void +ParseCommandLineOptions(int argc, char* argv[], Int_t myRank, struct cmdLineOpts* opts); +void +VerifyAndWriteFinalOutput(Real_t elapsed_time, Domain& locDom, Int_t nx, Int_t numRanks); + +// lulesh-viz +void +DumpToVisit(Domain& domain, int numFiles, int myRank, int numRanks); + +// lulesh-comm +void +CommRecv(Domain& domain, Int_t msgType, Index_t xferFields, Index_t dx, Index_t dy, + Index_t dz, bool doRecv, bool planeOnly); +void +CommSend(Domain& domain, Int_t msgType, Index_t xferFields, Domain_member* fieldData, + Index_t dx, Index_t dy, Index_t dz, bool doSend, bool planeOnly); +void +CommSBN(Domain& domain, Int_t xferFields, Domain_member* fieldData); +void +CommSyncPosVel(Domain& domain); +void +CommMonoQ(Domain& domain); + +// lulesh-init +void +InitMeshDecomp(Int_t numRanks, Int_t myRank, Int_t* col, Int_t* row, Int_t* plane, + Int_t* side); + +/*********************************/ +/* Data structure implementation */ +/*********************************/ + +/* might want to add access methods so that memory can be */ +/* better managed, as in luleshFT */ + +template +T* +Allocate(size_t size) +{ + return static_cast( + Kokkos::kokkos_malloc(sizeof(T) * size + 8)); +} + +template +void +Release(T** ptr) +{ + if(*ptr != NULL) + { + Kokkos::kokkos_free(*ptr); + *ptr = NULL; + } +} + +struct MinFinder +{ + Real_t val; + int i; + KOKKOS_INLINE_FUNCTION + + MinFinder() + : val(100000000000000000000.0000) + , i(-1) + {} + + KOKKOS_INLINE_FUNCTION + MinFinder(const double& val_, const int& i_) + : val(val_) + , i(i_) + {} + + KOKKOS_INLINE_FUNCTION + MinFinder(const MinFinder& src) + : val(src.val) + , i(src.i) + {} + + // overloading += operator to do the max assignment + KOKKOS_INLINE_FUNCTION + void operator+=(MinFinder& src) + { + if(src.val < val) + { + val = src.val; + i = src.i; + } + } + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile MinFinder& src) volatile + { + if(src.val < val) + { + val = src.val; + i = src.i; + } + } +}; + +struct reduce_double3 +{ + double x, y, z; + KOKKOS_INLINE_FUNCTION + reduce_double3() + { + x = 0.0; + y = 0.0; + z = 0.0; + } + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile reduce_double3& src) volatile + { + x += src.x; + y += src.y; + z += src.z; + } + KOKKOS_INLINE_FUNCTION + void operator+=(const reduce_double3& src) + { + x += src.x; + y += src.y; + z += src.z; + } +}; diff --git a/projects/rocprofiler-systems/examples/lulesh/lulesh_tuple.h b/projects/rocprofiler-systems/examples/lulesh/lulesh_tuple.h new file mode 100644 index 0000000000..922ae91fe3 --- /dev/null +++ b/projects/rocprofiler-systems/examples/lulesh/lulesh_tuple.h @@ -0,0 +1,651 @@ +#if !defined(USE_MPI) +# error "You should specify USE_MPI=0 or USE_MPI=1 on the compile line" +#endif + +// OpenMP will be compiled in if this flag is set to 1 AND the compiler beging +// used supports it (i.e. the _OPENMP symbol is defined) +#define USE_OMP 1 + +#if USE_MPI +# include +#endif + +#include + +/* + define one of these three symbols: + + SEDOV_SYNC_POS_VEL_NONE + SEDOV_SYNC_POS_VEL_EARLY + SEDOV_SYNC_POS_VEL_LATE +*/ + +#define SEDOV_SYNC_POS_VEL_EARLY 1 + +#include +#include + +//************************************************** +// Allow flexibility for arithmetic representations +//************************************************** + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +// Precision specification +typedef float real4; +typedef double real8; +typedef long double real10; // 10 bytes on x86 + +typedef int Index_t; // array subscript and loop index +typedef real8 Real_t; // floating point representation +typedef int Int_t; // integer representation + +enum +{ + VolumeError = -1, + QStopError = -2 +}; + +inline real4 +SQRT(real4 arg) +{ + return sqrtf(arg); +} +inline real8 +SQRT(real8 arg) +{ + return sqrt(arg); +} +inline real10 +SQRT(real10 arg) +{ + return sqrtl(arg); +} + +inline real4 +CBRT(real4 arg) +{ + return cbrtf(arg); +} +inline real8 +CBRT(real8 arg) +{ + return cbrt(arg); +} +inline real10 +CBRT(real10 arg) +{ + return cbrtl(arg); +} + +inline real4 +FABS(real4 arg) +{ + return fabsf(arg); +} +inline real8 +FABS(real8 arg) +{ + return fabs(arg); +} +inline real10 +FABS(real10 arg) +{ + return fabsl(arg); +} + +// Stuff needed for boundary conditions +// 2 BCs on each of 6 hexahedral faces (12 bits) +#define XI_M 0x00007 +#define XI_M_SYMM 0x00001 +#define XI_M_FREE 0x00002 +#define XI_M_COMM 0x00004 + +#define XI_P 0x00038 +#define XI_P_SYMM 0x00008 +#define XI_P_FREE 0x00010 +#define XI_P_COMM 0x00020 + +#define ETA_M 0x001c0 +#define ETA_M_SYMM 0x00040 +#define ETA_M_FREE 0x00080 +#define ETA_M_COMM 0x00100 + +#define ETA_P 0x00e00 +#define ETA_P_SYMM 0x00200 +#define ETA_P_FREE 0x00400 +#define ETA_P_COMM 0x00800 + +#define ZETA_M 0x07000 +#define ZETA_M_SYMM 0x01000 +#define ZETA_M_FREE 0x02000 +#define ZETA_M_COMM 0x04000 + +#define ZETA_P 0x38000 +#define ZETA_P_SYMM 0x08000 +#define ZETA_P_FREE 0x10000 +#define ZETA_P_COMM 0x20000 + +// MPI Message Tags +#define MSG_COMM_SBN 1024 +#define MSG_SYNC_POS_VEL 2048 +#define MSG_MONOQ 3072 + +#define MAX_FIELDS_PER_MPI_COMM 6 + +// Assume 128 byte coherence +// Assume Real_t is an "integral power of 2" bytes wide +#define CACHE_COHERENCE_PAD_REAL (128 / sizeof(Real_t)) + +#define CACHE_ALIGN_REAL(n) \ + (((n) + (CACHE_COHERENCE_PAD_REAL - 1)) & ~(CACHE_COHERENCE_PAD_REAL - 1)) + +////////////////////////////////////////////////////// +// Primary data structure +////////////////////////////////////////////////////// + +/* + * The implementation of the data abstraction used for lulesh + * resides entirely in the Domain class below. You can change + * grouping and interleaving of fields here to maximize data layout + * efficiency for your underlying architecture or compiler. + * + * For example, fields can be implemented as STL objects or + * raw array pointers. As another example, individual fields + * m_x, m_y, m_z could be budled into + * + * struct { Real_t x, y, z ; } *m_coord ; + * + * allowing accessor functions such as + * + * "Real_t &x(Index_t idx) { return m_coord[idx].x ; }" + * "Real_t &y(Index_t idx) { return m_coord[idx].y ; }" + * "Real_t &z(Index_t idx) { return m_coord[idx].z ; }" + */ + +class Domain +{ +public: + // Constructor + Domain(Int_t numRanks, Index_t colLoc, Index_t rowLoc, Index_t planeLoc, Index_t nx, + Int_t tp, Int_t nr, Int_t balance, Int_t cost); + + // + // ALLOCATION + // + + void AllocateNodePersistent(Int_t numNode) // Node-centered + { + m_coord.resize(numNode); // coordinates + + m_vel.resize(numNode); // velocities + + m_acc.resize(numNode); // accelerations + + m_force.resize(numNode); // forces + + m_nodalMass.resize(numNode); // mass + } + + void AllocateElemPersistent(Int_t numElem) // Elem-centered + { + m_nodelist.resize(8 * numElem); + + // elem connectivities through face + m_faceToElem.resize(numElem); + + m_elemBC.resize(numElem); + + m_e.resize(numElem); + + m_pq.resize(numElem); + + m_qlqq.resize(numElem); + + m_vol.resize(numElem); + + m_delv.resize(numElem); + m_vdov.resize(numElem); + + m_arealg.resize(numElem); + + m_ss.resize(numElem); + + m_elemMass.resize(numElem); + } + + void AllocateGradients(Int_t numElem, Int_t allElem) + { + // Position gradients + m_delx_xi.resize(numElem); + m_delx_eta.resize(numElem); + m_delx_zeta.resize(numElem); + + // Velocity gradients + m_delv_xi.resize(allElem); + m_delv_eta.resize(allElem); + m_delv_zeta.resize(allElem); + } + + void DeallocateGradients() + { + m_delx_zeta.clear(); + m_delx_eta.clear(); + m_delx_xi.clear(); + + m_delv_zeta.clear(); + m_delv_eta.clear(); + m_delv_xi.clear(); + } + + void AllocateStrains(Int_t numElem) + { + m_dxx.resize(numElem); + m_dyy.resize(numElem); + m_dzz.resize(numElem); + } + + void DeallocateStrains() + { + m_dzz.clear(); + m_dyy.clear(); + m_dxx.clear(); + } + + // + // ACCESSORS + // + + // Node-centered + + // Nodal coordinates + Real_t& x(Index_t idx) { return m_coord[idx].x; } + Real_t& y(Index_t idx) { return m_coord[idx].y; } + Real_t& z(Index_t idx) { return m_coord[idx].z; } + + // Nodal velocities + Real_t& xd(Index_t idx) { return m_vel[idx].x; } + Real_t& yd(Index_t idx) { return m_vel[idx].y; } + Real_t& zd(Index_t idx) { return m_vel[idx].z; } + + // Nodal accelerations + Real_t& xdd(Index_t idx) { return m_acc[idx].x; } + Real_t& ydd(Index_t idx) { return m_acc[idx].y; } + Real_t& zdd(Index_t idx) { return m_acc[idx].z; } + + // Nodal forces + Real_t& fx(Index_t idx) { return m_force[idx].x; } + Real_t& fy(Index_t idx) { return m_force[idx].y; } + Real_t& fz(Index_t idx) { return m_force[idx].z; } + + // Nodal mass + Real_t& nodalMass(Index_t idx) { return m_nodalMass[idx]; } + + // Nodes on symmertry planes + Index_t symmX(Index_t idx) { return m_symmX[idx]; } + Index_t symmY(Index_t idx) { return m_symmY[idx]; } + Index_t symmZ(Index_t idx) { return m_symmZ[idx]; } + bool symmXempty() { return m_symmX.empty(); } + bool symmYempty() { return m_symmY.empty(); } + bool symmZempty() { return m_symmZ.empty(); } + + // + // Element-centered + // + Index_t& regElemSize(Index_t idx) { return m_regElemSize[idx]; } + Index_t& regNumList(Index_t idx) { return m_regNumList[idx]; } + Index_t* regNumList() { return &m_regNumList[0]; } + Index_t* regElemlist(Int_t r) { return m_regElemlist[r]; } + Index_t& regElemlist(Int_t r, Index_t idx) { return m_regElemlist[r][idx]; } + + Index_t* nodelist(Index_t idx) { return &m_nodelist[Index_t(8) * idx]; } + + // elem connectivities through face + Index_t& lxim(Index_t idx) { return m_faceToElem[idx].lxim; } + Index_t& lxip(Index_t idx) { return m_faceToElem[idx].lxip; } + Index_t& letam(Index_t idx) { return m_faceToElem[idx].letam; } + Index_t& letap(Index_t idx) { return m_faceToElem[idx].letap; } + Index_t& lzetam(Index_t idx) { return m_faceToElem[idx].lzetam; } + Index_t& lzetap(Index_t idx) { return m_faceToElem[idx].lzetap; } + + // elem face symm/free-surface flag + Int_t& elemBC(Index_t idx) { return m_elemBC[idx]; } + + // Principal strains - temporary + Real_t& dxx(Index_t idx) { return m_dxx[idx]; } + Real_t& dyy(Index_t idx) { return m_dyy[idx]; } + Real_t& dzz(Index_t idx) { return m_dzz[idx]; } + + // Velocity gradient - temporary + Real_t& delv_xi(Index_t idx) { return m_delv_xi[idx]; } + Real_t& delv_eta(Index_t idx) { return m_delv_eta[idx]; } + Real_t& delv_zeta(Index_t idx) { return m_delv_zeta[idx]; } + + // Position gradient - temporary + Real_t& delx_xi(Index_t idx) { return m_delx_xi[idx]; } + Real_t& delx_eta(Index_t idx) { return m_delx_eta[idx]; } + Real_t& delx_zeta(Index_t idx) { return m_delx_zeta[idx]; } + + // Energy + Real_t& e(Index_t idx) { return m_e[idx]; } + + // Pressure + Real_t& p(Index_t idx) { return m_pq[idx].p; } + + // Artificial viscosity + Real_t& q(Index_t idx) { return m_pq[idx].q; } + + // Linear term for q + Real_t& ql(Index_t idx) { return m_qlqq[idx].ql; } + // Quadratic term for q + Real_t& qq(Index_t idx) { return m_qlqq[idx].qq; } + + Real_t& delv(Index_t idx) { return m_delv[idx]; } + + // Relative volume + Real_t& v(Index_t idx) { return m_vol[idx].v; } + // Reference volume + Real_t& volo(Index_t idx) { return m_vol[idx].volo; } + + // volume derivative over volume + Real_t& vdov(Index_t idx) { return m_vdov[idx]; } + + // Element characteristic length + Real_t& arealg(Index_t idx) { return m_arealg[idx]; } + + // Sound speed + Real_t& ss(Index_t idx) { return m_ss[idx]; } + + // Element mass + Real_t& elemMass(Index_t idx) { return m_elemMass[idx]; } + + Index_t nodeElemCount(Index_t idx) + { + return m_nodeElemStart[idx + 1] - m_nodeElemStart[idx]; + } + + Index_t* nodeElemCornerList(Index_t idx) + { + return &m_nodeElemCornerList[m_nodeElemStart[idx]]; + } + + // Parameters + + // Cutoffs + Real_t u_cut() const { return m_u_cut; } + Real_t e_cut() const { return m_e_cut; } + Real_t p_cut() const { return m_p_cut; } + Real_t q_cut() const { return m_q_cut; } + Real_t v_cut() const { return m_v_cut; } + + // Other constants (usually are settable via input file in real codes) + Real_t hgcoef() const { return m_hgcoef; } + Real_t qstop() const { return m_qstop; } + Real_t monoq_max_slope() const { return m_monoq_max_slope; } + Real_t monoq_limiter_mult() const { return m_monoq_limiter_mult; } + Real_t ss4o3() const { return m_ss4o3; } + Real_t qlc_monoq() const { return m_qlc_monoq; } + Real_t qqc_monoq() const { return m_qqc_monoq; } + Real_t qqc() const { return m_qqc; } + + Real_t eosvmax() const { return m_eosvmax; } + Real_t eosvmin() const { return m_eosvmin; } + Real_t pmin() const { return m_pmin; } + Real_t emin() const { return m_emin; } + Real_t dvovmax() const { return m_dvovmax; } + Real_t refdens() const { return m_refdens; } + + // Timestep controls, etc... + Real_t& time() { return m_time; } + Real_t& deltatime() { return m_deltatime; } + Real_t& deltatimemultlb() { return m_deltatimemultlb; } + Real_t& deltatimemultub() { return m_deltatimemultub; } + Real_t& stoptime() { return m_stoptime; } + Real_t& dtcourant() { return m_dtcourant; } + Real_t& dthydro() { return m_dthydro; } + Real_t& dtmax() { return m_dtmax; } + Real_t& dtfixed() { return m_dtfixed; } + + Int_t& cycle() { return m_cycle; } + Index_t& numRanks() { return m_numRanks; } + + Index_t& colLoc() { return m_colLoc; } + Index_t& rowLoc() { return m_rowLoc; } + Index_t& planeLoc() { return m_planeLoc; } + Index_t& tp() { return m_tp; } + + Index_t& sizeX() { return m_sizeX; } + Index_t& sizeY() { return m_sizeY; } + Index_t& sizeZ() { return m_sizeZ; } + Index_t& numReg() { return m_numReg; } + Int_t& cost() { return m_cost; } + Index_t& numElem() { return m_numElem; } + Index_t& numNode() { return m_numNode; } + + Index_t& maxPlaneSize() { return m_maxPlaneSize; } + Index_t& maxEdgeSize() { return m_maxEdgeSize; } + + // + // MPI-Related additional data + // + +#if USE_MPI + // Communication Work space + Real_t* commDataSend; + Real_t* commDataRecv; + + // Maximum number of block neighbors + MPI_Request recvRequest[26]; // 6 faces + 12 edges + 8 corners + MPI_Request sendRequest[26]; // 6 faces + 12 edges + 8 corners +#endif + +private: + void BuildMesh(Int_t nx, Int_t edgeNodes, Int_t edgeElems); + void SetupThreadSupportStructures(); + void CreateRegionIndexSets(Int_t nreg, Int_t balance); + void SetupCommBuffers(Int_t edgeNodes); + void SetupSymmetryPlanes(Int_t edgeNodes); + void SetupElementConnectivities(Int_t edgeElems); + void SetupBoundaryConditions(Int_t edgeElems); + + // + // IMPLEMENTATION + // + + /* Node-centered */ + + struct Tuple3 + { + Real_t x, y, z; + }; + + Kokkos::View m_coord; /* coordinates */ + + Kokkos::View m_vel; /* velocities */ + + Kokkos::View m_acc; /* accelerations */ + + Kokkos::View m_force; /* forces */ + + Kokkos::View m_nodalMass; /* mass */ + + Kokkos::View m_symmX; /* symmetry plane nodesets */ + Kokkos::View m_symmY; + Kokkos::View m_symmZ; + + // Element-centered + + // Region information + Int_t m_numReg; + Int_t m_cost; // imbalance cost + Index_t* m_regElemSize; // Size of region sets + Index_t* m_regNumList; // Region number per domain element + Index_t** m_regElemlist; // region indexset + + Kokkos::View m_nodelist; /* elemToNode connectivity */ + + struct FaceElemConn + { + Index_t lxim, lxip, letam, letap, lzetam, lzetap; + }; + + Kokkos::View m_faceToElem; /* element conn across faces */ + + Kokkos::View m_elemBC; /* symmetry/free-surface flags for each elem face */ + + Kokkos::View m_dxx; /* principal strains -- temporary */ + Kokkos::View m_dyy; + Kokkos::View m_dzz; + + Kokkos::View m_delv_xi; /* velocity gradient -- temporary */ + Kokkos::View m_delv_eta; + Kokkos::View m_delv_zeta; + + Kokkos::View m_delx_xi; /* coordinate gradient -- temporary */ + Kokkos::View m_delx_eta; + Kokkos::View m_delx_zeta; + + Kokkos::View m_e; /* energy */ + + struct Pcomponents + { + Real_t p, q; + }; + + Kokkos::View m_pq; /* pressure and artificial viscosity */ + + struct Qcomponents + { + Real_t ql, qq; + }; + + Kokkos::View m_qlqq; /* linear and quadratic terms for q */ + + struct Volume + { + Real_t v, volo; + }; + + Kokkos::View m_vol; /* relative and reference volume */ + + Kokkos::View m_vnew; /* new relative volume -- temporary */ + Kokkos::View m_delv; /* m_vnew - m_v */ + Kokkos::View m_vdov; /* volume derivative over volume */ + + Kokkos::View m_arealg; /* characteristic length of an element */ + + Kokkos::View m_ss; /* "sound speed" */ + + Kokkos::View m_elemMass; /* mass */ + + // Cutoffs (treat as constants) + const Real_t m_e_cut; // energy tolerance + const Real_t m_p_cut; // pressure tolerance + const Real_t m_q_cut; // q tolerance + const Real_t m_v_cut; // relative volume tolerance + const Real_t m_u_cut; // velocity tolerance + + // Other constants (usually setable, but hardcoded in this proxy app) + + const Real_t m_hgcoef; // hourglass control + const Real_t m_ss4o3; + const Real_t m_qstop; // excessive q indicator + const Real_t m_monoq_max_slope; + const Real_t m_monoq_limiter_mult; + const Real_t m_qlc_monoq; // linear term coef for q + const Real_t m_qqc_monoq; // quadratic term coef for q + const Real_t m_qqc; + const Real_t m_eosvmax; + const Real_t m_eosvmin; + const Real_t m_pmin; // pressure floor + const Real_t m_emin; // energy floor + const Real_t m_dvovmax; // maximum allowable volume change + const Real_t m_refdens; // reference density + + // Variables to keep track of timestep, simulation time, and cycle + Real_t m_dtcourant; // courant constraint + Real_t m_dthydro; // volume change constraint + Int_t m_cycle; // iteration count for simulation + Real_t m_dtfixed; // fixed time increment + Real_t m_time; // current time + Real_t m_deltatime; // variable time increment + Real_t m_deltatimemultlb; + Real_t m_deltatimemultub; + Real_t m_dtmax; // maximum allowable time increment + Real_t m_stoptime; // end time for simulation + + Int_t m_numRanks; + + Index_t m_colLoc; + Index_t m_rowLoc; + Index_t m_planeLoc; + Index_t m_tp; + + Index_t m_sizeX; + Index_t m_sizeY; + Index_t m_sizeZ; + Index_t m_numElem; + Index_t m_numNode; + + Index_t m_maxPlaneSize; + Index_t m_maxEdgeSize; + + // OMP hack + Index_t* m_nodeElemStart; + Index_t* m_nodeElemCornerList; + + // Used in setup + Index_t m_rowMin, m_rowMax; + Index_t m_colMin, m_colMax; + Index_t m_planeMin, m_planeMax; +}; + +typedef Real_t& (Domain::*Domain_member)(Index_t); + +struct cmdLineOpts +{ + Int_t its; // -i + Int_t nx; // -s + Int_t numReg; // -r + Int_t numFiles; // -f + Int_t showProg; // -p + Int_t quiet; // -q + Int_t viz; // -v + Int_t cost; // -c + Int_t balance; // -b +}; + +// Function Prototypes + +// lulesh-par +Real_t +CalcElemVolume(const Real_t x[8], const Real_t y[8], const Real_t z[8]); + +// lulesh-util +void +ParseCommandLineOptions(int argc, char* argv[], Int_t myRank, struct cmdLineOpts* opts); +void +VerifyAndWriteFinalOutput(Real_t elapsed_time, Domain& locDom, Int_t nx, Int_t numRanks); + +// lulesh-viz +void +DumpToVisit(Domain& domain, int numFiles, int myRank, int numRanks); + +// lulesh-comm +void +CommRecv(Domain& domain, Int_t msgType, Index_t xferFields, Index_t dx, Index_t dy, + Index_t dz, bool doRecv, bool planeOnly); +void +CommSend(Domain& domain, Int_t msgType, Index_t xferFields, Domain_member* fieldData, + Index_t dx, Index_t dy, Index_t dz, bool doSend, bool planeOnly); +void +CommSBN(Domain& domain, Int_t xferFields, Domain_member* fieldData); +void +CommSyncPosVel(Domain& domain); +void +CommMonoQ(Domain& domain); + +// lulesh-init +void +InitMeshDecomp(Int_t numRanks, Int_t myRank, Int_t* col, Int_t* row, Int_t* plane, + Int_t* side); diff --git a/projects/rocprofiler-systems/examples/mpi/CMakeLists.txt b/projects/rocprofiler-systems/examples/mpi/CMakeLists.txt new file mode 100644 index 0000000000..24b1bf0208 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/CMakeLists.txt @@ -0,0 +1,92 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-mpi-examples LANGUAGES C CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +find_package(MPI) +if(NOT MPI_FOUND) + if("${CMAKE_PROJECT_NAME}" STREQUAL "rocprofiler-systems" AND "$ENV{ROCPROFSYS_CI}") + set(_MSG_TYPE STATUS) # don't generate warnings during CI + else() + set(_MSG_TYPE AUTHOR_WARNING) + endif() + message( + ${_MSG_TYPE} + "MPI could not be found. Cannot build rocprofiler-systems-mpi target" + ) + return() +endif() + +find_package(Threads REQUIRED) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") + +add_library(mpi-c-interface-library INTERFACE) +target_link_libraries( + mpi-c-interface-library + INTERFACE + Threads::Threads + MPI::MPI_C + $ +) +target_compile_options(mpi-c-interface-library INTERFACE -Wno-double-promotion) + +add_executable(mpi-allgather allgather.c) +target_link_libraries(mpi-allgather PRIVATE mpi-c-interface-library) + +add_executable(mpi-bcast bcast.c) +target_link_libraries(mpi-bcast PRIVATE mpi-c-interface-library) + +add_executable(mpi-all2all all2all.c) +target_link_libraries(mpi-all2all PRIVATE mpi-c-interface-library) + +add_executable(mpi-reduce reduce.c) +target_link_libraries(mpi-reduce PRIVATE mpi-c-interface-library) + +add_executable(mpi-scatter-gather scatter-gather.c) +target_link_libraries(mpi-scatter-gather PRIVATE mpi-c-interface-library) + +add_executable(mpi-send-recv send-recv.c) +target_link_libraries(mpi-send-recv PRIVATE mpi-c-interface-library) + +add_executable(mpi-allreduce allreduce.c) +target_link_libraries(mpi-allreduce PRIVATE mpi-c-interface-library m) + +set(CMAKE_BUILD_TYPE "Release") + +add_library(mpi-cxx-interface-library INTERFACE) +target_link_libraries( + mpi-cxx-interface-library + INTERFACE + Threads::Threads + MPI::MPI_CXX + $ +) + +add_executable(mpi-example mpi.cpp) +target_link_libraries(mpi-example PRIVATE mpi-cxx-interface-library) + +if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + TARGETS + mpi-example + mpi-allgather + mpi-bcast + mpi-all2all + mpi-reduce + mpi-scatter-gather + mpi-send-recv + DESTINATION bin + COMPONENT rocprofiler-systems-examples + ) +endif() diff --git a/projects/rocprofiler-systems/examples/mpi/all2all.c b/projects/rocprofiler-systems/examples/mpi/all2all.c new file mode 100644 index 0000000000..ab698e02dd --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/all2all.c @@ -0,0 +1,244 @@ +// Author: Wes Kendall +// Copyright 2014 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// A program that bins random numbers using MPI_Alltoallv. +// +#include +#include +#include +#include +#include + +// Creates an array of random numbers for binning. Note that the numbers are +// between [0, 1) +float* +create_random_numbers(int numbers_per_proc) +{ + float* random_numbers = (float*) malloc(sizeof(float) * numbers_per_proc); + int i; + for(i = 0; i < numbers_per_proc; i++) + { + int r = rand(); + // Make sure that the random number is never exactly one. + if(r == RAND_MAX) + { + r--; + } + random_numbers[i] = rand() / (float) (RAND_MAX); + } + return random_numbers; +} + +// Given a number, determine which process owns it. Since numbers are from [0, 1), +// simply multiple the number by the size of the MPI world to figure out which +// process owns it +int +which_process_owns_this_number(float rand_num, int world_size) +{ + return (int) (rand_num * world_size); +} + +// Gets the starting value for a process's bin +float +get_bin_start(int world_rank, int world_size) +{ + return (float) world_rank / world_size; +} + +// Gets the ending value for a process's bin +float +get_bin_end(int world_rank, int world_size) +{ + return get_bin_start(world_rank + 1, world_size); +} + +// This function returns the amount of numbers that will be sent to each +// process given the array of random numbers. +int* +get_send_amounts_per_proc(float* rand_nums, int numbers_per_proc, int world_size) +{ + int* send_amounts_per_proc = (int*) malloc(sizeof(int) * world_size); + // Initialize the amount of numbers per process to zero + memset(send_amounts_per_proc, 0, sizeof(int) * world_size); + + // For each random number, determine which process owns it and increment + // the amount of numbers for that process. + int i; + for(i = 0; i < numbers_per_proc; i++) + { + int owning_rank = which_process_owns_this_number(rand_nums[i], world_size); + send_amounts_per_proc[owning_rank]++; + } + + return send_amounts_per_proc; +} + +// Given how many numbers each process is sending to the other processes, find +// out how many numbers you are receiving from each process. This function +// returns an array of counts indexed on the rank of the process from which it +// will receive the numbers. +int* +get_recv_amounts_per_proc(int* send_amounts_per_proc, int world_size) +{ + int* recv_amounts_per_proc = (int*) malloc(sizeof(int) * world_size); + + // Perform an Alltoall for the send counts. This will send the send counts + // from each process and place them in the recv_amounts_per_proc array of + // the receiving processes to let them know how many numbers they will + // receive when binning occurs. + MPI_Alltoall(send_amounts_per_proc, 1, MPI_INT, recv_amounts_per_proc, 1, MPI_INT, + MPI_COMM_WORLD); + return recv_amounts_per_proc; +} + +// Given an array (of size "size") of counts, return the prefix sum of the +// counts. +int* +prefix_sum(const int* counts, int size) +{ + int* prefix_sum_result = (int*) malloc(sizeof(int) * size); + prefix_sum_result[0] = 0; + int i; + for(i = 1; i < size; i++) + { + prefix_sum_result[i] = prefix_sum_result[i - 1] + counts[i - 1]; + } + return prefix_sum_result; +} + +// Returns the sum of an array +int +sum(const int* arr, int size) +{ + int sum_result = 0; + int i; + for(i = 0; i < size; i++) + { + sum_result += arr[i]; + } + return sum_result; +} + +// Used for sorting floating point numbers +int +compare_float(const void* a, const void* b) +{ + if(*(float*) a < *(float*) b) + { + return -1; + } + else if(*(float*) a > *(float*) b) + { + return 1; + } + else + { + return 0; + } +} + +// Verifies that the binned numbers belong to the process. +void +verify_bin_nums(float* binned_nums, int num_count, int world_rank, int world_size) +{ + int i; + float bin_start = get_bin_start(world_rank, world_size); + float bin_end = get_bin_end(world_rank, world_size); + for(i = 0; i < num_count; i++) + { + if(binned_nums[i] >= bin_end || binned_nums[i] < bin_start) + { + fprintf( + stderr, + "Error: Binned number %f exceeds bin range [%f - %f) for process %d\n", + binned_nums[i], bin_start, bin_end, world_rank); + } + } +} + +int +main(int argc, char** argv) +{ + if(argc != 2) + { + fprintf(stderr, "Usage: bin numbers_per_proc\n"); + exit(1); + } + + // Get the amount of random numbers to create per process + int numbers_per_proc = atoi(argv[1]); + + MPI_Init(NULL, NULL); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // Seed the random number generator to get different results each time + srand(time(NULL) * world_rank); + + // Create the random numbers on this process. Note that all numbers + // will be between 0 and 1 + float* rand_nums = create_random_numbers(numbers_per_proc); + + // Given the array of random numbers, determine how many will be sent + // to each process (based on the which process owns the number). + // The return value from this function is an array of counts + // for each rank in the communicator. + // The count represents how many numbers each process will receive + // when they are binned from this process. + int* send_amounts_per_proc = + get_send_amounts_per_proc(rand_nums, numbers_per_proc, world_size); + + // Determine how many numbers you will receive from each process. This + // information is needed to set up the binning call. + int* recv_amounts_per_proc = + get_recv_amounts_per_proc(send_amounts_per_proc, world_size); + + // Do a prefix sum for the send/recv amounts to get the send/recv offsets for + // the MPI_Alltoallv call (the binning call). + int* send_offsets_per_proc = prefix_sum(send_amounts_per_proc, world_size); + int* recv_offsets_per_proc = prefix_sum(recv_amounts_per_proc, world_size); + + // Allocate an array to hold the binned numbers for this process based on the total + // amount of numbers this process will receive from others. + int total_recv_amount = sum(recv_amounts_per_proc, world_size); + float* binned_nums = (float*) malloc(sizeof(float) * total_recv_amount); + + // The final step before binning - arrange all of the random numbers so that they + // are ordered by bin. For simplicity, we are simply going to sort the random + // numbers, however, this could be optimized since the numbers don't need to be + // fully sorted. + qsort(rand_nums, numbers_per_proc, sizeof(float), &compare_float); + + // Perform the binning step with MPI_Alltoallv. This will send all of the numbers in + // the rand_nums array to their proper bin. Each process will only contain numbers + // belonging to its bin after this step. For example, if there are 4 processes, + // process 0 will contain numbers in the [0, .25) range. + MPI_Alltoallv(rand_nums, send_amounts_per_proc, send_offsets_per_proc, MPI_FLOAT, + binned_nums, recv_amounts_per_proc, recv_offsets_per_proc, MPI_FLOAT, + MPI_COMM_WORLD); + + // Print results + printf("Process %d received %d numbers in bin [%f - %f)\n", world_rank, + total_recv_amount, get_bin_start(world_rank, world_size), + get_bin_end(world_rank, world_size)); + + // Check that the bin numbers are correct + verify_bin_nums(binned_nums, total_recv_amount, world_rank, world_size); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + + // Clean up + free(rand_nums); + free(send_amounts_per_proc); + free(recv_amounts_per_proc); + free(send_offsets_per_proc); + free(recv_offsets_per_proc); + free(binned_nums); +} diff --git a/projects/rocprofiler-systems/examples/mpi/allgather.c b/projects/rocprofiler-systems/examples/mpi/allgather.c new file mode 100644 index 0000000000..4400896918 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/allgather.c @@ -0,0 +1,107 @@ +// Author: Wes Kendall +// Copyright 2012 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// Program that computes the average of an array of elements in parallel using +// MPI_Scatter and MPI_Allgather +// +#include +#include +#include +#include +#include + +// Creates an array of random numbers. Each number has a value from 0 - 1 +float* +create_rand_nums(int num_elements) +{ + float* rand_nums = (float*) malloc(sizeof(float) * num_elements); + assert(rand_nums != NULL); + int i; + for(i = 0; i < num_elements; i++) + { + rand_nums[i] = (rand() / (float) RAND_MAX); + } + return rand_nums; +} + +// Computes the average of an array of numbers +float +compute_avg(float* array, int num_elements) +{ + float sum = 0.f; + int i; + for(i = 0; i < num_elements; i++) + { + sum += array[i]; + } + return sum / num_elements; +} + +int +main(int argc, char** argv) +{ + if(argc != 2) + { + fprintf(stderr, "Usage: avg num_elements_per_proc\n"); + exit(1); + } + + int num_elements_per_proc = atoi(argv[1]); + // Seed the random number generator to get different results each time + srand(time(NULL)); + + MPI_Init(NULL, NULL); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // Create a random array of elements on the root process. Its total + // size will be the number of elements per process times the number + // of processes + float* rand_nums = NULL; + if(world_rank == 0) + { + rand_nums = create_rand_nums(num_elements_per_proc * world_size); + } + + // For each process, create a buffer that will hold a subset of the entire + // array + float* sub_rand_nums = (float*) malloc(sizeof(float) * num_elements_per_proc); + assert(sub_rand_nums != NULL); + + // Scatter the random numbers from the root process to all processes in + // the MPI world + MPI_Scatter(rand_nums, num_elements_per_proc, MPI_FLOAT, sub_rand_nums, + num_elements_per_proc, MPI_FLOAT, 0, MPI_COMM_WORLD); + + // Compute the average of your subset + float sub_avg = compute_avg(sub_rand_nums, num_elements_per_proc); + + // Gather all partial averages down to all the processes + float* sub_avgs = (float*) malloc(sizeof(float) * world_size); + assert(sub_avgs != NULL); + MPI_Allgather(&sub_avg, 1, MPI_FLOAT, sub_avgs, 1, MPI_FLOAT, MPI_COMM_WORLD); + + // Now that we have all of the partial averages, compute the + // total average of all numbers. Since we are assuming each process computed + // an average across an equal amount of elements, this computation will + // produce the correct answer. + float avg = compute_avg(sub_avgs, world_size); + printf("Avg of all elements from proc %d is %f\n", world_rank, avg); + + // Clean up + if(world_rank == 0) + { + free(rand_nums); + } + free(sub_avgs); + free(sub_rand_nums); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +} diff --git a/projects/rocprofiler-systems/examples/mpi/allreduce.c b/projects/rocprofiler-systems/examples/mpi/allreduce.c new file mode 100644 index 0000000000..ca252c2729 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/allreduce.c @@ -0,0 +1,94 @@ +// Author: Wes Kendall +// Copyright 2013 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// Program that computes the standard deviation of an array of elements in parallel using +// MPI_Reduce. +// +#include +#include +#include +#include +#include +#include + +// Creates an array of random numbers. Each number has a value from 0 - 1 +float* +create_rand_nums(int num_elements) +{ + float* rand_nums = (float*) malloc(sizeof(float) * num_elements); + assert(rand_nums != NULL); + int i; + for(i = 0; i < num_elements; i++) + { + rand_nums[i] = (rand() / (float) RAND_MAX); + } + return rand_nums; +} + +int +main(int argc, char** argv) +{ + if(argc != 2) + { + fprintf(stderr, "Usage: avg num_elements_per_proc\n"); + exit(1); + } + + int num_elements_per_proc = atoi(argv[1]); + + MPI_Init(NULL, NULL); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // Create a random array of elements on all processes. + srand(time(NULL) * + world_rank); // Seed the random number generator of processes uniquely + float* rand_nums = NULL; + rand_nums = create_rand_nums(num_elements_per_proc); + + // Sum the numbers locally + float local_sum = 0; + int i; + for(i = 0; i < num_elements_per_proc; i++) + { + local_sum += rand_nums[i]; + } + + // Reduce all of the local sums into the global sum in order to + // calculate the mean + float global_sum; + MPI_Allreduce(&local_sum, &global_sum, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); + float mean = global_sum / (num_elements_per_proc * world_size); + + // Compute the local sum of the squared differences from the mean + float local_sq_diff = 0; + for(i = 0; i < num_elements_per_proc; i++) + { + local_sq_diff += (rand_nums[i] - mean) * (rand_nums[i] - mean); + } + + // Reduce the global sum of the squared differences to the root process + // and print off the answer + float global_sq_diff; + MPI_Reduce(&local_sq_diff, &global_sq_diff, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + + // The standard deviation is the square root of the mean of the squared + // differences. + if(world_rank == 0) + { + float stddev = sqrt(global_sq_diff / (num_elements_per_proc * world_size)); + printf("Mean - %f, Standard deviation = %f\n", mean, stddev); + } + + // Clean up + free(rand_nums); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +} diff --git a/projects/rocprofiler-systems/examples/mpi/bcast.c b/projects/rocprofiler-systems/examples/mpi/bcast.c new file mode 100644 index 0000000000..038359b3cf --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/bcast.c @@ -0,0 +1,134 @@ +// Author: Wes Kendall +// Copyright 2011 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// Comparison of MPI_Bcast with the my_bcast function +// +#include +#include +#include +#include + +void +my_bcast(void* data, int count, MPI_Datatype datatype, int root, MPI_Comm communicator) +{ + int world_rank; + MPI_Comm_rank(communicator, &world_rank); + int world_size; + MPI_Comm_size(communicator, &world_size); + + if(world_rank == root) + { + // If we are the root process, send our data to everyone + int i; + for(i = 0; i < world_size; i++) + { + if(i != world_rank) + { + MPI_Send(data, count, datatype, i, 0, communicator); + } + } + } + else + { + // If we are a receiver process, receive the data from the root + MPI_Recv(data, count, datatype, root, 0, communicator, MPI_STATUS_IGNORE); + } +} + +void +my_ibcast(void* data, int count, MPI_Datatype datatype, int root, MPI_Comm communicator) +{ + int world_rank; + MPI_Comm_rank(communicator, &world_rank); + int world_size; + MPI_Comm_size(communicator, &world_size); + MPI_Request request = MPI_REQUEST_NULL; + + if(world_rank == root) + { + // If we are the root process, send our data to everyone + int i; + for(i = 0; i < world_size; i++) + { + if(i != world_rank) + { + MPI_Isend(data, count, datatype, i, 0, communicator, &request); + } + } + } + else + { + // If we are a receiver process, receive the data from the root + MPI_Irecv(data, count, datatype, root, 0, communicator, &request); + } + + MPI_Status status; + // bloks and waits for destination process to receive data + MPI_Wait(&request, &status); +} + +int +main(int argc, char** argv) +{ + int num_elements = 30; + int num_trials = 50; + + if(argc != 3) fprintf(stderr, "Usage: compare_bcast [num_elements] [num_trials]\n"); + + if(argc > 1) num_elements = atoi(argv[1]); + if(argc > 2) num_trials = atoi(argv[2]); + + MPI_Init(NULL, NULL); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + double total_my_bcast_time = 0.0; + double total_my_ibcast_time = 0.0; + double total_mpi_bcast_time = 0.0; + int i; + int* data = (int*) malloc(sizeof(int) * num_elements); + assert(data != NULL); + + for(i = 0; i < num_trials; i++) + { + // Time my_bcast + // Synchronize before starting timing + MPI_Barrier(MPI_COMM_WORLD); + total_my_bcast_time -= MPI_Wtime(); + my_bcast(data, num_elements, MPI_INT, 0, MPI_COMM_WORLD); + // Synchronize again before obtaining final time + MPI_Barrier(MPI_COMM_WORLD); + total_my_bcast_time += MPI_Wtime(); + + MPI_Barrier(MPI_COMM_WORLD); + total_my_ibcast_time -= MPI_Wtime(); + my_ibcast(data, num_elements, MPI_INT, 0, MPI_COMM_WORLD); + // Synchronize again before obtaining final time + MPI_Barrier(MPI_COMM_WORLD); + total_my_ibcast_time += MPI_Wtime(); + + // Time MPI_Bcast + MPI_Barrier(MPI_COMM_WORLD); + total_mpi_bcast_time -= MPI_Wtime(); + MPI_Bcast(data, num_elements, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); + total_mpi_bcast_time += MPI_Wtime(); + } + + // Print off timing information + if(world_rank == 0) + { + printf("Data size = %d, Trials = %d\n", num_elements * (int) sizeof(int), + num_trials); + printf("Avg my_bcast time = %lf\n", total_my_bcast_time / num_trials); + printf("Avg my_ibcast time = %lf\n", total_my_ibcast_time / num_trials); + printf("Avg MPI_Bcast time = %lf\n", total_mpi_bcast_time / num_trials); + } + + free(data); + MPI_Finalize(); +} diff --git a/projects/rocprofiler-systems/examples/mpi/mpi.cpp b/projects/rocprofiler-systems/examples/mpi/mpi.cpp new file mode 100644 index 0000000000..67a2d28a3c --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/mpi.cpp @@ -0,0 +1,350 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +auto _name = std::string{}; +} // namespace + +template +auto +get_values_str(const Tp& _data) +{ + std::stringstream _ss{}; + for(auto&& itr : _data) + _ss << ", " << std::setw(6) << std::setprecision(2) << std::fixed << itr; + return _ss.str().substr(1); +} + +template +auto +get_dist(std::mt19937_64& _mt) +{ + static auto _dist = []() { + if constexpr(std::is_integral::value) + { + return std::uniform_int_distribution(1, N * N); + } + else + { + return std::uniform_real_distribution(1.0, N * N); + } + }(); + return _dist(_mt); +} + +template +auto +get_dtype() +{ + auto _dtype = MPI_INT; // NOLINT + if(std::is_same::value) + _dtype = MPI_LONG; + else if(std::is_same::value) + _dtype = MPI_FLOAT; + else if(std::is_same::value) + _dtype = MPI_DOUBLE; + return _dtype; +} + +template +void +all2all(int _rank, MPI_Comm _comm) +{ + if(_comm == MPI_COMM_NULL) return; + static_assert(N > 0, "Error! N must be greater than zero!"); + + auto _dtype = get_dtype(); + auto _mt = std::mt19937_64{ size_t(_rank + 100) }; + auto values_sent = std::array{}; + auto values_recv = std::array{}; + for(size_t i = 0; i < N; ++i) + values_sent[i] = get_dist(_mt); + + if(_rank == 0) + printf("[%s][%s][%2i] values sent (# = %zu) :: %s.\n", _name.c_str(), + __FUNCTION__, _rank, values_sent.size(), + get_values_str(values_sent).c_str()); + + MPI_Alltoall(&values_sent[_rank], 1, _dtype, &values_recv[_rank], 1, _dtype, _comm); + + if(_rank == 0) + printf("[%s][%s][%2i] values recv (# = %zu) :: %s.\n", _name.c_str(), + __FUNCTION__, _rank, values_sent.size(), + get_values_str(values_recv).c_str()); +} + +template +void +send_recv(int _rank, MPI_Comm _comm) +{ + if(_comm == MPI_COMM_NULL) return; + static_assert(N > 0, "Error! N must be greater than zero!"); + int _size = 0; + MPI_Comm_size(_comm, &_size); + + auto _dtype = get_dtype(); + auto _mt = std::mt19937_64{ size_t(_rank + 100) }; + auto values_sent = std::array{}; + auto values_recv = std::array{}; + for(size_t i = 0; i < N; ++i) + values_sent[i] = get_dist(_mt); + + if(_rank == 0 || _rank == _size - 1) + printf("[%s][%s][%2i] values sent (# = %zu) :: %s.\n", _name.c_str(), + __FUNCTION__, _rank, values_sent.size(), + get_values_str(values_sent).c_str()); + + for(int i = 0; i < _size; ++i) + { + if(i != _rank) MPI_Send(&values_sent[_rank], 1, _dtype, i, N, _comm); + } + + for(int i = 0; i < _size; ++i) + { + if(i != _rank) + { + MPI_Status _status; + MPI_Recv(&values_recv[i], 1, _dtype, i, N, _comm, &_status); + } + } + + if(_rank == 0 || _rank == _size - 1) + printf("[%s][%s][%2i] values recv (# = %zu) :: %s.\n", _name.c_str(), + __FUNCTION__, _rank, values_sent.size(), + get_values_str(values_recv).c_str()); +} + +void +run(MPI_Comm _comm, int nitr) +{ + if(_comm == MPI_COMM_NULL) return; + int _rank = 0; + int _size = 0; + MPI_Comm_rank(_comm, &_rank); + MPI_Comm_size(_comm, &_size); + + printf("[%s][%s][%2i] running %i iterations on %i ranks... \n", _name.c_str(), + __FUNCTION__, _rank, nitr, _size); + + MPI_Barrier(_comm); + for(int i = 0; i < nitr; ++i) + { + send_recv(_rank, _comm); + send_recv(_rank, _comm); + send_recv(_rank, _comm); + send_recv(_rank, _comm); + MPI_Barrier(_comm); + all2all(_rank, _comm); + all2all(_rank, _comm); + all2all(_rank, _comm); + all2all(_rank, _comm); + } + MPI_Barrier(_comm); + + printf("[%s][%s][%2i] running %i iterations on %i ranks... Done\n", _name.c_str(), + __FUNCTION__, _rank, nitr, _size); +} + +void +print_info(MPI_Comm _comm, bool _verbose, std::string _msg = {}) +{ + if(_comm == MPI_COMM_NULL) return; + int _rank = 0; + int _size = 1; + MPI_Comm_rank(_comm, &_rank); + MPI_Comm_size(_comm, &_size); + + if(!_msg.empty()) _msg = "[" + _msg + "] "; + + if(_verbose) + { + auto _ppid = getppid(); + std::ifstream _ifs{ "/proc/" + std::to_string(_ppid) + "/task/" + + std::to_string(_ppid) + "/children" }; + std::stringstream _ss{}; + while(_ifs) + { + std::string _s{}; + _ifs >> _s; + _ss << _s << " "; + } + if(_rank == 0) + printf("[%s]%s RANK = %i (out of %i), PID = %i, PPID = %i :: %s\n", + _name.c_str(), _msg.c_str(), _rank, _size, getpid(), getppid(), + _ss.str().c_str()); + } + else + { + if(_rank == 0) + printf("[%s]%s RANK = %i (out of %i), PID = %i, PPID = %i\n", _name.c_str(), + _msg.c_str(), _rank, _size, getpid(), getppid()); + } +} + +void +run_main(int argc, char** argv) +{ + int rank = 0; + int size = 1; + int nitr = 1; + + if(argc > 1) nitr = atoi(argv[1]); + + MPI_Comm_size(MPI_COMM_WORLD, &size); + + _name = argv[0]; + auto _pos = _name.find_last_of('/'); + if(_pos < _name.length()) _name = _name.substr(_pos + 1); + + printf("[%s] Number of iterations: %i\n", _name.c_str(), nitr); + + printf("[%s][%2i] running with MPI_COMM_WORLD...\n", _name.c_str(), getpid()); + run(MPI_COMM_WORLD, nitr); + + print_info(MPI_COMM_WORLD, true, "MPI_COMM_WORLD"); + + if(size > 1) + { + MPI_Comm dup; + printf("[%s][%2i] Duplicating MPI_COMM_WORLD...\n", _name.c_str(), getpid()); + MPI_Comm_dup(MPI_COMM_WORLD, &dup); + + printf("[%s][%2i] running with duplicated comm of MPI_COMM_WORLD...\n", + _name.c_str(), getpid()); + run(dup, nitr); + + MPI_Comm_rank(dup, &rank); + if(rank == 0) printf("[%s]\n", _name.c_str()); + printf("[%s][%2i] RANK = %i on duplicated MPI_COMM_WORLD...\n", _name.c_str(), + getpid(), rank); + + if(size > 3) + { + std::vector comms(3); + for(int i = 0; i < size; ++i) + { + auto _idx = i % 3; + printf( + "[%s][%2i] Splitting duplicated MPI_COMM_WORLD %i (rank = %i)...\n", + _name.c_str(), getpid(), _idx, rank); + MPI_Comm* comm = &comms.at(_idx); + MPI_Comm_split(dup, _idx, rank, comm); + } + + for(auto itr : comms) // NOLINT + MPI_Barrier(itr); + + for(int i = 0; i < size; ++i) + { + auto _idx = i % 3; + int _rank = 0; + MPI_Comm_rank(comms.at(_idx), &_rank); + printf("[%s][%2i] Running on split communicator %i (rank = %i)...\n", + _name.c_str(), getpid(), _idx, _rank); + run(comms.at(_idx), nitr); + } + + // Get the group of processes in MPI_COMM_WORLD + MPI_Group world_group; + MPI_Comm_group(MPI_COMM_WORLD, &world_group); + + int n = 0; + const int ranks[7] = { 1, 2, 3, 5, 7, 11, 13 }; + for(int r : ranks) + if(r < size) ++n; + + // Construct a group containing all of the prime ranks in world_group + MPI_Group prime_group; + MPI_Group_incl(world_group, n, ranks, &prime_group); + + // Create a new communicator based on the group + MPI_Comm prime_comm; + MPI_Comm_create_group(MPI_COMM_WORLD, prime_group, 0, &prime_comm); + + MPI_Group nonprime_group; + MPI_Group_difference(world_group, prime_group, &nonprime_group); + + MPI_Comm nonprime_comm; + MPI_Comm_create_group(MPI_COMM_WORLD, nonprime_group, 1, &nonprime_comm); + + print_info(prime_comm, false, "Prime comm"); + print_info(nonprime_comm, false, "Non-prime comm"); + + run(prime_comm, nitr); + run(nonprime_comm, nitr); + + MPI_Group_free(&world_group); + MPI_Group_free(&prime_group); + MPI_Group_free(&nonprime_group); + } + + print_info(dup, false); + } + + printf("[%s][%i of %i] %s... Done\n", _name.c_str(), rank, size, __FUNCTION__); +} + +int +main(int argc, char** argv) +{ + std::this_thread::sleep_for(std::chrono::seconds{ 2 }); + int _mpi_thread_requested = MPI_THREAD_SERIALIZED; + int _mpi_thread_provided = 0; + MPI_Init_thread(&argc, &argv, _mpi_thread_requested, &_mpi_thread_provided); + + if(_mpi_thread_provided != _mpi_thread_requested) + throw std::runtime_error("Error! requested thread mode != provided thread mode"); + + auto _prom = std::promise{}; + auto _fut = _prom.get_future(); + + std::thread{ [&]() { + _prom.set_value_at_thread_exit(); + run_main(argc, argv); + } }.join(); + + _fut.wait(); + + MPI_Finalize(); + return EXIT_SUCCESS; +} diff --git a/projects/rocprofiler-systems/examples/mpi/reduce.c b/projects/rocprofiler-systems/examples/mpi/reduce.c new file mode 100644 index 0000000000..6d82cc9be8 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/reduce.c @@ -0,0 +1,82 @@ +// Author: Wes Kendall +// Copyright 2013 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// Program that computes the average of an array of elements in parallel using +// MPI_Reduce. +// +#include +#include +#include +#include +#include + +// Creates an array of random numbers. Each number has a value from 0 - 1 +float* +create_rand_nums(int num_elements) +{ + float* rand_nums = (float*) malloc(sizeof(float) * num_elements); + assert(rand_nums != NULL); + int i; + for(i = 0; i < num_elements; i++) + { + rand_nums[i] = (rand() / (float) RAND_MAX); + } + return rand_nums; +} + +int +main(int argc, char** argv) +{ + if(argc != 2) + { + fprintf(stderr, "Usage: avg num_elements_per_proc\n"); + exit(1); + } + + int num_elements_per_proc = atoi(argv[1]); + + MPI_Init(NULL, NULL); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // Create a random array of elements on all processes. + srand(time(NULL) * world_rank); // Seed the random number generator to get different + // results each time for each processor + float* rand_nums = NULL; + rand_nums = create_rand_nums(num_elements_per_proc); + + // Sum the numbers locally + float local_sum = 0; + int i; + for(i = 0; i < num_elements_per_proc; i++) + { + local_sum += rand_nums[i]; + } + + // Print the random numbers on each process + printf("Local sum for process %d - %f, avg = %f\n", world_rank, local_sum, + local_sum / num_elements_per_proc); + + // Reduce all of the local sums into the global sum + float global_sum; + MPI_Reduce(&local_sum, &global_sum, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + + // Print the result + if(world_rank == 0) + { + printf("Total sum = %f, avg = %f\n", global_sum, + global_sum / (world_size * num_elements_per_proc)); + } + + // Clean up + free(rand_nums); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +} diff --git a/projects/rocprofiler-systems/examples/mpi/scatter-gather.c b/projects/rocprofiler-systems/examples/mpi/scatter-gather.c new file mode 100644 index 0000000000..c9af42cb53 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/scatter-gather.c @@ -0,0 +1,118 @@ +// Author: Wes Kendall +// Copyright 2012 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// Program that computes the average of an array of elements in parallel using +// MPI_Scatter and MPI_Gather +// +#include +#include +#include +#include +#include + +// Creates an array of random numbers. Each number has a value from 0 - 1 +float* +create_rand_nums(int num_elements) +{ + float* rand_nums = (float*) malloc(sizeof(float) * num_elements); + assert(rand_nums != NULL); + int i; + for(i = 0; i < num_elements; i++) + { + rand_nums[i] = (rand() / (float) RAND_MAX); + } + return rand_nums; +} + +// Computes the average of an array of numbers +float +compute_avg(float* array, int num_elements) +{ + float sum = 0.f; + int i; + for(i = 0; i < num_elements; i++) + { + sum += array[i]; + } + return sum / num_elements; +} + +int +main(int argc, char** argv) +{ + if(argc != 2) + { + fprintf(stderr, "Usage: avg num_elements_per_proc\n"); + exit(1); + } + + int num_elements_per_proc = atoi(argv[1]); + // Seed the random number generator to get different results each time + srand(time(NULL)); + + MPI_Init(NULL, NULL); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // Create a random array of elements on the root process. Its total + // size will be the number of elements per process times the number + // of processes + float* rand_nums = NULL; + if(world_rank == 0) + { + rand_nums = create_rand_nums(num_elements_per_proc * world_size); + } + + // For each process, create a buffer that will hold a subset of the entire + // array + float* sub_rand_nums = (float*) malloc(sizeof(float) * num_elements_per_proc); + assert(sub_rand_nums != NULL); + + // Scatter the random numbers from the root process to all processes in + // the MPI world + MPI_Scatter(rand_nums, num_elements_per_proc, MPI_FLOAT, sub_rand_nums, + num_elements_per_proc, MPI_FLOAT, 0, MPI_COMM_WORLD); + + // Compute the average of your subset + float sub_avg = compute_avg(sub_rand_nums, num_elements_per_proc); + + // Gather all partial averages down to the root process + float* sub_avgs = NULL; + if(world_rank == 0) + { + sub_avgs = (float*) malloc(sizeof(float) * world_size); + assert(sub_avgs != NULL); + } + MPI_Gather(&sub_avg, 1, MPI_FLOAT, sub_avgs, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); + + // Now that we have all of the partial averages on the root, compute the + // total average of all numbers. Since we are assuming each process computed + // an average across an equal amount of elements, this computation will + // produce the correct answer. + if(world_rank == 0) + { + float avg = compute_avg(sub_avgs, world_size); + printf("Avg of all elements is %f\n", avg); + // Compute the average across the original data for comparison + float original_data_avg = + compute_avg(rand_nums, num_elements_per_proc * world_size); + printf("Avg computed across original data is %f\n", original_data_avg); + } + + // Clean up + if(world_rank == 0) + { + free(rand_nums); + free(sub_avgs); + } + free(sub_rand_nums); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +} diff --git a/projects/rocprofiler-systems/examples/mpi/send-recv.c b/projects/rocprofiler-systems/examples/mpi/send-recv.c new file mode 100644 index 0000000000..ee43a07183 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/send-recv.c @@ -0,0 +1,55 @@ +// Author: Wes Kendall +// Copyright 2011 www.mpitutorial.com +// This code is provided freely with the tutorials on mpitutorial.com. Feel +// free to modify it for your own use. Any distribution of the code must +// either provide a link to www.mpitutorial.com or keep this header intact. +// +// Ping pong example with MPI_Send and MPI_Recv. Two processes ping pong a +// number back and forth, incrementing it until it reaches a given value. +// +#include +#include +#include + +int +main(int argc, char** argv) +{ + const int PING_PONG_LIMIT = 10; + + // Initialize the MPI environment + MPI_Init(NULL, NULL); + // Find out rank, size + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // We are assuming 2 processes for this task + if(world_size != 2) + { + fprintf(stderr, "World size must be two for %s, not %i\n", argv[0], world_size); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int ping_pong_count = 0; + int partner_rank = (world_rank + 1) % 2; + while(ping_pong_count < PING_PONG_LIMIT) + { + if(world_rank == ping_pong_count % 2) + { + // Increment the ping pong count before you send it + ping_pong_count++; + MPI_Send(&ping_pong_count, 1, MPI_INT, partner_rank, 0, MPI_COMM_WORLD); + printf("%d sent and incremented ping_pong_count %d to %d\n", world_rank, + ping_pong_count, partner_rank); + } + else + { + MPI_Recv(&ping_pong_count, 1, MPI_INT, partner_rank, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + printf("%d received ping_pong_count %d from %d\n", world_rank, + ping_pong_count, partner_rank); + } + } + MPI_Finalize(); +} diff --git a/projects/rocprofiler-systems/examples/openmp/CG/cg.cpp b/projects/rocprofiler-systems/examples/openmp/CG/cg.cpp new file mode 100644 index 0000000000..24f1b436ff --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/CG/cg.cpp @@ -0,0 +1,1056 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +Authors of the Fortran code: + M. Yarrow + C. Kuszmaul + H. Jin + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff + +------------------------------------------------------------------------------ + +The OpenMP version is a parallel implementation of the serial C++ version +OpenMP version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-OMP + +Authors of the OpenMP code: + Júnior Löff + +*/ + +#include "../common/npb-CPP.hpp" +#include "npbparams.hpp" + +/* + * --------------------------------------------------------------------- + * note: please observe that in the routine conj_grad three + * implementations of the sparse matrix-vector multiply have + * been supplied. the default matrix-vector multiply is not + * loop unrolled. the alternate implementations are unrolled + * to a depth of 2 and unrolled to a depth of 8. please + * experiment with these to find the fastest for your particular + * architecture. if reporting timing results, any of these three may + * be used without penalty. + * --------------------------------------------------------------------- + * class specific parameters: + * it appears here for reference only. + * these are their values, however, this info is imported in the npbparams.h + * include file, which is written by the sys/setparams.c program. + * --------------------------------------------------------------------- + */ +#define NZ (NA * (NONZER + 1) * (NONZER + 1)) +#define NAZ (NA * (NONZER + 1)) +#define T_INIT 0 +#define T_BENCH 1 +#define T_CONJ_GRAD 2 +#define T_LAST 3 + +/* global variables */ +#if defined(DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION) +static int colidx[NZ]; +static int rowstr[NA + 1]; +static int iv[NA]; +static int arow[NA]; +static int acol[NAZ]; +static double aelt[NAZ]; +static double a[NZ]; +static double x[NA + 2]; +static double z[NA + 2]; +static double p[NA + 2]; +static double q[NA + 2]; +static double r[NA + 2]; +#else +static int(*colidx) = (int*) malloc(sizeof(int) * (NZ)); +static int(*rowstr) = (int*) malloc(sizeof(int) * (NA + 1)); +static int(*iv) = (int*) malloc(sizeof(int) * (NA)); +static int(*arow) = (int*) malloc(sizeof(int) * (NA)); +static int(*acol) = (int*) malloc(sizeof(int) * (NAZ)); +static double(*aelt) = (double*) malloc(sizeof(double) * (NAZ)); +static double(*a) = (double*) malloc(sizeof(double) * (NZ)); +static double(*x) = (double*) malloc(sizeof(double) * (NA + 2)); +static double(*z) = (double*) malloc(sizeof(double) * (NA + 2)); +static double(*p) = (double*) malloc(sizeof(double) * (NA + 2)); +static double(*q) = (double*) malloc(sizeof(double) * (NA + 2)); +static double(*r) = (double*) malloc(sizeof(double) * (NA + 2)); +#endif +static int naa; +static int nzz; +static int firstrow; +static int lastrow; +static int firstcol; +static int lastcol; +static double amult; +static double tran; +static boolean timeron; + +/* function prototypes */ +static void +conj_grad(const int colidx[], const int rowstr[], const double x[], double z[], + const double a[], double p[], double q[], double r[], double* rnorm); +static int +icnvrt(double x, int ipwr2); +static void +makea(int n, int nz, double a[], int colidx[], int rowstr[], int firstrow, int lastrow, + int firstcol, int lastcol, int arow[], int acol[][NONZER + 1], + double aelt[][NONZER + 1], int iv[]); +static void +sparse(double a[], int colidx[], int rowstr[], int n, int nz, int nozer, const int arow[], + int acol[][NONZER + 1], double aelt[][NONZER + 1], int firstrow, int lastrow, + int nzloc[], double rcond, double shift); +static void +sprnvc(int n, int nz, int nn1, double v[], int iv[]); +static void +vecset(int n, double v[], int iv[], int* nzv, int i, double val); + +/* cg */ +int +main(int /*argc*/, char** /*argv*/) +{ +#if defined(DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION) + printf( + " DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION mode on\n"); +#endif + int i, j, k, it; + double zeta; + double rnorm; + double norm_temp1, norm_temp2; + double t, mflops, tmax; + char class_npb; + boolean verified; + double zeta_verify_value = 0.0; + double epsilon = 0.0; + double err = 0.0; + + char* t_names[T_LAST]; + + for(i = 0; i < T_LAST; i++) + { + timer_clear(i); + } + + FILE* fp; + if((fp = fopen("timer.flag", "r")) != nullptr) + { + timeron = TRUE; + t_names[T_INIT] = (char*) "init"; + t_names[T_BENCH] = (char*) "benchmk"; + t_names[T_CONJ_GRAD] = (char*) "conjgd"; + fclose(fp); + } + else + { + timeron = FALSE; + } + + timer_start(T_INIT); + + firstrow = 0; + lastrow = NA - 1; + firstcol = 0; + lastcol = NA - 1; + + if(NA == 1400 && NONZER == 7 && NITER == 15 && SHIFT == 10.0) + { + class_npb = 'S'; + zeta_verify_value = 8.5971775078648; + } + else if(NA == 7000 && NONZER == 8 && NITER == 15 && SHIFT == 12.0) + { + class_npb = 'W'; + zeta_verify_value = 10.362595087124; + } + else if(NA == 14000 && NONZER == 11 && NITER == 15 && SHIFT == 20.0) + { + class_npb = 'A'; + zeta_verify_value = 17.130235054029; + } + else if(NA == 75000 && NONZER == 13 && NITER == 75 && SHIFT == 60.0) + { + class_npb = 'B'; + zeta_verify_value = 22.712745482631; + } + else if(NA == 150000 && NONZER == 15 && NITER == 75 && SHIFT == 110.0) + { + class_npb = 'C'; + zeta_verify_value = 28.973605592845; + } + else if(NA == 1500000 && NONZER == 21 && NITER == 100 && SHIFT == 500.0) + { + class_npb = 'D'; + zeta_verify_value = 52.514532105794; + } + else if(NA == 9000000 && NONZER == 26 && NITER == 100 && SHIFT == 1500.0) + { + class_npb = 'E'; + zeta_verify_value = 77.522164599383; + } + else + { + class_npb = 'U'; + } + + printf("\n\n NAS Parallel Benchmarks 4.1 Parallel C++ version with OpenMP - CG " + "Benchmark\n\n"); + printf(" Size: %11d\n", NA); + printf(" Iterations: %5d\n", NITER); + + naa = NA; + nzz = NZ; + + /* initialize random number generator */ + tran = 314159265.0; + amult = 1220703125.0; + zeta = randlc(&tran, amult); + + makea(naa, nzz, a, colidx, rowstr, firstrow, lastrow, firstcol, lastcol, arow, + (int(*)[NONZER + 1])(void*) acol, (double(*)[NONZER + 1])(void*) aelt, iv); + +/* + * --------------------------------------------------------------------- + * note: as a result of the above call to makea: + * values of j used in indexing rowstr go from 0 --> lastrow-firstrow + * values of colidx which are col indexes go from firstcol --> lastcol + * so: + * shift the col index vals from actual (firstcol --> lastcol) + * to local, i.e., (0 --> lastcol-firstcol) + * --------------------------------------------------------------------- + */ +#pragma omp parallel private(it, i, j, k) + { +#pragma omp for nowait + for(j = 0; j < lastrow - firstrow + 1; j++) + { + for(k = rowstr[j]; k < rowstr[j + 1]; k++) + { + colidx[k] = colidx[k] - firstcol; + } + } + +/* set starting vector to (1, 1, .... 1) */ +#pragma omp for nowait + for(i = 0; i < NA + 1; i++) + { + x[i] = 1.0; + } +#pragma omp for nowait + for(j = 0; j < lastcol - firstcol + 1; j++) + { + q[j] = 0.0; + z[j] = 0.0; + r[j] = 0.0; + p[j] = 0.0; + } + +#pragma omp single + zeta = 0.0; + + /* + * ------------------------------------------------------------------- + * ----> + * do one iteration untimed to init all code and data page tables + * ----> (then reinit, start timing, to niter its) + * -------------------------------------------------------------------*/ + + for(it = 1; it <= 1; it++) + { + /* the call to the conjugate gradient routine */ + conj_grad(colidx, rowstr, x, z, a, p, q, r, &rnorm); +#pragma omp single + { + norm_temp1 = 0.0; + norm_temp2 = 0.0; + } + +/* + * -------------------------------------------------------------------- + * zeta = shift + 1/(x.z) + * so, first: (x.z) + * also, find norm of z + * so, first: (z.z) + * -------------------------------------------------------------------- + */ +#pragma omp for reduction(+ : norm_temp1, norm_temp2) + for(j = 0; j < lastcol - firstcol + 1; j++) + { + norm_temp1 += x[j] * z[j]; + norm_temp2 += +z[j] * z[j]; + } + +#pragma omp single + norm_temp2 = 1.0 / sqrt(norm_temp2); + +/* normalize z to obtain x */ +#pragma omp for + for(j = 0; j < lastcol - firstcol + 1; j++) + { + x[j] = norm_temp2 * z[j]; + } + + } /* end of do one iteration untimed */ + +/* set starting vector to (1, 1, .... 1) */ +#pragma omp for + for(i = 0; i < NA + 1; i++) + { + x[i] = 1.0; + } + +#pragma omp single + zeta = 0.0; + +#pragma omp master + { + timer_stop(T_INIT); + + printf(" Initialization time = %15.3f seconds\n", timer_read(T_INIT)); + + timer_start(T_BENCH); + } + + /* + * -------------------------------------------------------------------- + * ----> + * main iteration for inverse power method + * ----> + * -------------------------------------------------------------------- + */ + for(it = 1; it <= NITER; it++) + { +/* the call to the conjugate gradient routine */ +#pragma omp master + if(timeron != 0) + { + timer_start(T_CONJ_GRAD); + } + conj_grad(colidx, rowstr, x, z, a, p, q, r, &rnorm); +#pragma omp master + if(timeron != 0) + { + timer_stop(T_CONJ_GRAD); + } + +#pragma omp single + { + norm_temp1 = 0.0; + norm_temp2 = 0.0; + } + +/* + * -------------------------------------------------------------------- + * zeta = shift + 1/(x.z) + * so, first: (x.z) + * also, find norm of z + * so, first: (z.z) + * -------------------------------------------------------------------- + */ +#pragma omp for reduction(+ : norm_temp1, norm_temp2) + for(j = 0; j < lastcol - firstcol + 1; j++) + { + norm_temp1 += x[j] * z[j]; + norm_temp2 += z[j] * z[j]; + } +#pragma omp single + { + norm_temp2 = 1.0 / sqrt(norm_temp2); + zeta = SHIFT + 1.0 / norm_temp1; + } + +#pragma omp master + { + if(it == 1) + { + printf("\n iteration ||r|| zeta\n"); + } + printf(" %5d %20.14e%20.13e\n", it, rnorm, zeta); + } +/* normalize z to obtain x */ +#pragma omp for + for(j = 0; j < lastcol - firstcol + 1; j++) + { + x[j] = norm_temp2 * z[j]; + } + } /* end of main iter inv pow meth */ + } /* end parallel */ + timer_stop(T_BENCH); + + /* + * -------------------------------------------------------------------- + * end of timed section + * -------------------------------------------------------------------- + */ + + t = timer_read(T_BENCH); + + printf(" Benchmark completed\n"); + + epsilon = 1.0e-10; + if(class_npb != 'U') + { + err = fabs(zeta - zeta_verify_value) / zeta_verify_value; + if(err <= epsilon) + { + verified = TRUE; + printf(" VERIFICATION SUCCESSFUL\n"); + printf(" Zeta is %20.13e\n", zeta); + printf(" Error is %20.13e\n", err); + } + else + { + verified = FALSE; + printf(" VERIFICATION FAILED\n"); + printf(" Zeta %20.13e\n", zeta); + printf(" The correct zeta is %20.13e\n", zeta_verify_value); + } + } + else + { + verified = FALSE; + printf(" Problem size unknown\n"); + printf(" NO VERIFICATION PERFORMED\n"); + } + if(t != 0.0) + { + mflops = (double) (2.0 * NITER * NA) * + (3.0 + (double) (NONZER * (NONZER + 1)) + + 25.0 * (5.0 + (double) (NONZER * (NONZER + 1))) + 3.0) / + t / 1000000.0; + } + else + { + mflops = 0.0; + } + setenv("OMP_NUM_THREADS", "1", 0); + c_print_results((char*) "CG", class_npb, NA, 0, 0, NITER, t, mflops, + (char*) " floating point", verified, (char*) NPBVERSION, + (char*) COMPILETIME, (char*) COMPILERVERSION, (char*) LIBVERSION, + std::getenv("OMP_NUM_THREADS"), (char*) CS1, (char*) CS2, (char*) CS3, + (char*) CS4, (char*) CS5, (char*) CS6, (char*) CS7); + + /* + * --------------------------------------------------------------------- + * more timers + * --------------------------------------------------------------------- + */ + if(timeron != 0) + { + tmax = timer_read(T_BENCH); + if(tmax == 0.0) + { + tmax = 1.0; + } + printf(" SECTION Time (secs)\n"); + for(i = 0; i < T_LAST; i++) + { + t = timer_read(i); + if(i == T_INIT) + { + printf(" %8s:%9.3f\n", t_names[i], t); + } + else + { + printf(" %8s:%9.3f (%6.2f%%)\n", t_names[i], t, t * 100.0 / tmax); + if(i == T_CONJ_GRAD) + { + t = tmax - t; + printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest", t, t * 100.0 / tmax); + } + } + } + } + + return 0; +} + +/* + * --------------------------------------------------------------------- + * floating point arrays here are named as in NPB1 spec discussion of + * CG algorithm + * --------------------------------------------------------------------- + */ +static void +conj_grad(const int colidx[], const int rowstr[], const double x[], double z[], + const double a[], double p[], double q[], double r[], double* rnorm) +{ + int j, k; + int cgit, cgitmax; + double alpha, beta, suml; + static double d, sum, rho, rho0; + + cgitmax = 25; +#pragma omp single nowait + { + rho = 0.0; + sum = 0.0; + } +/* initialize the CG algorithm */ +#pragma omp for + for(j = 0; j < naa + 1; j++) + { + q[j] = 0.0; + z[j] = 0.0; + r[j] = x[j]; + p[j] = r[j]; + } + +/* + * -------------------------------------------------------------------- + * rho = r.r + * now, obtain the norm of r: First, sum squares of r elements locally... + * -------------------------------------------------------------------- + */ +#pragma omp for reduction(+ : rho) + for(j = 0; j < lastcol - firstcol + 1; j++) + { + rho += r[j] * r[j]; + } + + /* the conj grad iteration loop */ + for(cgit = 1; cgit <= cgitmax; cgit++) + { + /* + * --------------------------------------------------------------------- + * q = A.p + * the partition submatrix-vector multiply: use workspace w + * --------------------------------------------------------------------- + * + * note: this version of the multiply is actually (slightly: maybe %5) + * faster on the sp2 on 16 nodes than is the unrolled-by-2 version + * below. on the Cray t3d, the reverse is TRUE, i.e., the + * unrolled-by-two version is some 10% faster. + * the unrolled-by-8 version below is significantly faster + * on the Cray t3d - overall speed of code is 1.5 times faster. + */ + +#pragma omp single nowait + { + d = 0.0; + /* + * -------------------------------------------------------------------- + * save a temporary of rho + * -------------------------------------------------------------------- + */ + rho0 = rho; + rho = 0.0; + } + +#pragma omp for nowait + for(j = 0; j < lastrow - firstrow + 1; j++) + { + suml = 0.0; + for(k = rowstr[j]; k < rowstr[j + 1]; k++) + { + suml += a[k] * p[colidx[k]]; + } + q[j] = suml; + } + + /* + * -------------------------------------------------------------------- + * obtain p.q + * -------------------------------------------------------------------- + */ + +#pragma omp for reduction(+ : d) + for(j = 0; j < lastcol - firstcol + 1; j++) + { + d += p[j] * q[j]; + } + + /* + * -------------------------------------------------------------------- + * obtain alpha = rho / (p.q) + * ------------------------------------------------------------------- + */ + alpha = rho0 / d; + + /* + * --------------------------------------------------------------------- + * obtain z = z + alpha*p + * and r = r - alpha*q + * --------------------------------------------------------------------- + */ + +#pragma omp for reduction(+ : rho) + for(j = 0; j < lastcol - firstcol + 1; j++) + { + z[j] += alpha * p[j]; + r[j] -= alpha * q[j]; + + /* + * --------------------------------------------------------------------- + * rho = r.r + * now, obtain the norm of r: first, sum squares of r elements locally... + * --------------------------------------------------------------------- + */ + rho += r[j] * r[j]; + } + + /* + * --------------------------------------------------------------------- + * obtain beta + * --------------------------------------------------------------------- + */ + beta = rho / rho0; + +/* + * --------------------------------------------------------------------- + * p = r + beta*p + * --------------------------------------------------------------------- + */ +#pragma omp for + for(j = 0; j < lastcol - firstcol + 1; j++) + { + p[j] = r[j] + beta * p[j]; + } + } /* end of do cgit=1, cgitmax */ + +/* + * --------------------------------------------------------------------- + * compute residual norm explicitly: ||r|| = ||x - A.z|| + * first, form A.z + * the partition submatrix-vector multiply + * --------------------------------------------------------------------- + */ +#pragma omp for nowait + for(j = 0; j < lastrow - firstrow + 1; j++) + { + suml = 0.0; + for(k = rowstr[j]; k < rowstr[j + 1]; k++) + { + suml += a[k] * z[colidx[k]]; + } + r[j] = suml; + } + +/* + * --------------------------------------------------------------------- + * at this point, r contains A.z + * --------------------------------------------------------------------- + */ +#pragma omp for reduction(+ : sum) + for(j = 0; j < lastcol - firstcol + 1; j++) + { + suml = x[j] - r[j]; + sum += suml * suml; + } +#pragma omp single + *rnorm = sqrt(sum); +} + +/* + * --------------------------------------------------------------------- + * scale a double precision number x in (0,1) by a power of 2 and chop it + * --------------------------------------------------------------------- + */ +static int +icnvrt(double x, int ipwr2) +{ + return (int) (ipwr2 * x); +} + +/* + * --------------------------------------------------------------------- + * generate the test problem for benchmark 6 + * makea generates a sparse matrix with a + * prescribed sparsity distribution + * + * parameter type usage + * + * input + * + * n i number of cols/rows of matrix + * nz i nonzeros as declared array size + * rcond r*8 condition number + * shift r*8 main diagonal shift + * + * output + * + * a r*8 array for nonzeros + * colidx i col indices + * rowstr i row pointers + * + * workspace + * + * iv, arow, acol i + * aelt r*8 + * --------------------------------------------------------------------- + */ +static void +makea(int n, int nz, double a[], int colidx[], int rowstr[], int firstrow, int lastrow, + int firstcol, int lastcol, int arow[], int acol[][NONZER + 1], + double aelt[][NONZER + 1], int iv[]) +{ + (void) firstcol; + (void) lastcol; + + int iouter, ivelt, nzv, nn1; + int ivc[NONZER + 1]; + double vc[NONZER + 1]; + + /* + * -------------------------------------------------------------------- + * nonzer is approximately (int(sqrt(nnza /n))); + * -------------------------------------------------------------------- + * nn1 is the smallest power of two not less than n + * -------------------------------------------------------------------- + */ + nn1 = 1; + do + { + nn1 = 2 * nn1; + } while(nn1 < n); + + /* + * ------------------------------------------------------------------- + * generate nonzero positions and save for the use in sparse + * ------------------------------------------------------------------- + */ + for(iouter = 0; iouter < n; iouter++) + { + nzv = NONZER; + sprnvc(n, nzv, nn1, vc, ivc); + vecset(n, vc, ivc, &nzv, iouter + 1, 0.5); + arow[iouter] = nzv; + for(ivelt = 0; ivelt < nzv; ivelt++) + { + acol[iouter][ivelt] = ivc[ivelt] - 1; + aelt[iouter][ivelt] = vc[ivelt]; + } + } + + /* + * --------------------------------------------------------------------- + * ... make the sparse matrix from list of elements with duplicates + * (iv is used as workspace) + * --------------------------------------------------------------------- + */ + sparse(a, colidx, rowstr, n, nz, NONZER, arow, acol, aelt, firstrow, lastrow, iv, + RCOND, SHIFT); +} + +/* + * --------------------------------------------------------------------- + * rows range from firstrow to lastrow + * the rowstr pointers are defined for nrows = lastrow-firstrow+1 values + * --------------------------------------------------------------------- + */ +static void +sparse(double a[], int colidx[], int rowstr[], int n, int nz, int nozer, const int arow[], + int acol[][NONZER + 1], double aelt[][NONZER + 1], int firstrow, int lastrow, + int nzloc[], double rcond, double shift) +{ + (void) nozer; + int nrows; + + /* + * --------------------------------------------------- + * generate a sparse matrix from a list of + * [col, row, element] tri + * --------------------------------------------------- + */ + int i, j, j1, j2, nza, k, kk, nzrow, jcol; + double size, scale, ratio, va; + boolean goto_40; + + /* + * -------------------------------------------------------------------- + * how many rows of result + * -------------------------------------------------------------------- + */ + nrows = lastrow - firstrow + 1; + + /* + * -------------------------------------------------------------------- + * ...count the number of triples in each row + * -------------------------------------------------------------------- + */ + for(j = 0; j < nrows + 1; j++) + { + rowstr[j] = 0; + } + for(i = 0; i < n; i++) + { + for(nza = 0; nza < arow[i]; nza++) + { + j = acol[i][nza] + 1; + rowstr[j] = rowstr[j] + arow[i]; + } + } + rowstr[0] = 0; + for(j = 1; j < nrows + 1; j++) + { + rowstr[j] = rowstr[j] + rowstr[j - 1]; + } + nza = rowstr[nrows] - 1; + + /* + * --------------------------------------------------------------------- + * ... rowstr(j) now is the location of the first nonzero + * of row j of a + * --------------------------------------------------------------------- + */ + if(nza > nz) + { + printf("Space for matrix elements exceeded in sparse\n"); + printf("nza, nzmax = %d, %d\n", nza, nz); + exit(EXIT_FAILURE); + } + + /* + * --------------------------------------------------------------------- + * ... preload data pages + * --------------------------------------------------------------------- + */ + for(j = 0; j < nrows; j++) + { + for(k = rowstr[j]; k < rowstr[j + 1]; k++) + { + a[k] = 0.0; + colidx[k] = -1; + } + nzloc[j] = 0; + } + + /* + * --------------------------------------------------------------------- + * ... generate actual values by summing duplicates + * --------------------------------------------------------------------- + */ + size = 1.0; + ratio = pow(rcond, (1.0 / (double) (n))); + for(i = 0; i < n; i++) + { + for(nza = 0; nza < arow[i]; nza++) + { + j = acol[i][nza]; + + scale = size * aelt[i][nza]; + for(nzrow = 0; nzrow < arow[i]; nzrow++) + { + jcol = acol[i][nzrow]; + va = aelt[i][nzrow] * scale; + + /* + * -------------------------------------------------------------------- + * ... add the identity * rcond to the generated matrix to bound + * the smallest eigenvalue from below by rcond + * -------------------------------------------------------------------- + */ + if(jcol == j && j == i) + { + va = va + rcond - shift; + } + + goto_40 = FALSE; + for(k = rowstr[j]; k < rowstr[j + 1]; k++) + { + if(colidx[k] > jcol) + { + /* + * ---------------------------------------------------------------- + * ... insert colidx here orderly + * ---------------------------------------------------------------- + */ + for(kk = rowstr[j + 1] - 2; kk >= k; kk--) + { + if(colidx[kk] > -1) + { + a[kk + 1] = a[kk]; + colidx[kk + 1] = colidx[kk]; + } + } + colidx[k] = jcol; + a[k] = 0.0; + goto_40 = TRUE; + break; + } + else if(colidx[k] == -1) + { + colidx[k] = jcol; + goto_40 = TRUE; + break; + } + else if(colidx[k] == jcol) + { + /* + * -------------------------------------------------------------- + * ... mark the duplicated entry + * ------------------------------------------------------------- + */ + nzloc[j] = nzloc[j] + 1; + goto_40 = TRUE; + break; + } + } + if(goto_40 == FALSE) + { + printf("internal error in sparse: i=%d\n", i); + exit(EXIT_FAILURE); + } + a[k] = a[k] + va; + } + } + size = size * ratio; + } + + /* + * --------------------------------------------------------------------- + * ... remove empty entries and generate final results + * --------------------------------------------------------------------- + */ + for(j = 1; j < nrows; j++) + { + nzloc[j] = nzloc[j] + nzloc[j - 1]; + } + + for(j = 0; j < nrows; j++) + { + if(j > 0) + { + j1 = rowstr[j] - nzloc[j - 1]; + } + else + { + j1 = 0; + } + j2 = rowstr[j + 1] - nzloc[j]; + nza = rowstr[j]; + for(k = j1; k < j2; k++) + { + a[k] = a[nza]; + colidx[k] = colidx[nza]; + nza = nza + 1; + } + } + for(j = 1; j < nrows + 1; j++) + { + rowstr[j] = rowstr[j] - nzloc[j - 1]; + } + nza = rowstr[nrows] - 1; +} + +/* + * --------------------------------------------------------------------- + * generate a sparse n-vector (v, iv) + * having nzv nonzeros + * + * mark(i) is set to 1 if position i is nonzero. + * mark is all zero on entry and is reset to all zero before exit + * this corrects a performance bug found by John G. Lewis, caused by + * reinitialization of mark on every one of the n calls to sprnvc + * --------------------------------------------------------------------- + */ +static void +sprnvc(int n, int nz, int nn1, double v[], int iv[]) +{ + int nzv, ii, i; + double vecelt, vecloc; + + nzv = 0; + + while(nzv < nz) + { + vecelt = randlc(&tran, amult); + + /* + * -------------------------------------------------------------------- + * generate an integer between 1 and n in a portable manner + * -------------------------------------------------------------------- + */ + vecloc = randlc(&tran, amult); + i = icnvrt(vecloc, nn1) + 1; + if(i > n) + { + continue; + } + + /* + * -------------------------------------------------------------------- + * was this integer generated already? + * -------------------------------------------------------------------- + */ + boolean was_gen = FALSE; + for(ii = 0; ii < nzv; ii++) + { + if(iv[ii] == i) + { + was_gen = TRUE; + break; + } + } + if(was_gen != 0) + { + continue; + } + v[nzv] = vecelt; + iv[nzv] = i; + nzv = nzv + 1; + } +} + +/* + * -------------------------------------------------------------------- + * set ith element of sparse vector (v, iv) with + * nzv nonzeros to val + * -------------------------------------------------------------------- + */ +static void +vecset(int n, double v[], int iv[], int* nzv, int i, double val) +{ + (void) n; + int k; + boolean set; + + set = FALSE; + for(k = 0; k < *nzv; k++) + { + if(iv[k] == i) + { + v[k] = val; + set = TRUE; + } + } + if(set == FALSE) + { + v[*nzv] = val; + iv[*nzv] = i; + *nzv = *nzv + 1; + } +} diff --git a/projects/rocprofiler-systems/examples/openmp/CG/npbparams.hpp b/projects/rocprofiler-systems/examples/openmp/CG/npbparams.hpp new file mode 100644 index 0000000000..472c52c2aa --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/CG/npbparams.hpp @@ -0,0 +1,23 @@ +/* CLASS = B */ +/* + c This file is generated automatically by the setparams utility. + c It sets the number of processors and the class_npb of the NPB + c in this directory. Do not modify it by hand. + */ +#define NA 50000 +#define NONZER 13 +#define NITER 50 +#define SHIFT 60.0 +#define RCOND 1.0e-1 +#define CONVERTDOUBLE FALSE +#define COMPILETIME "01 Mar 2022" +#define NPBVERSION "4.1" +#define LIBVERSION "201511" +#define COMPILERVERSION "11.1.0" +#define CS1 "g++ -std=c++14" +#define CS2 "$(CC)" +#define CS3 "-lm" +#define CS4 "-I../common " +#define CS5 "-O3 -fopenmp -mcmodel=medium" +#define CS6 "-O3 -fopenmp -mcmodel=medium" +#define CS7 "randdp" diff --git a/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt b/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt new file mode 100644 index 0000000000..1354f832af --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt @@ -0,0 +1,99 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-openmp LANGUAGES CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +file(GLOB common_source ${CMAKE_CURRENT_SOURCE_DIR}/common/*.cpp) +add_library(openmp-common OBJECT ${common_source}) +target_include_directories(openmp-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/common) + +add_executable( + openmp-cg + ${CMAKE_CURRENT_SOURCE_DIR}/CG/cg.cpp + $ +) +add_executable( + openmp-lu + ${CMAKE_CURRENT_SOURCE_DIR}/LU/lu.cpp + $ +) + +option(USE_CLANG_OMP "Use the clang OpenMP if available" ON) +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + find_package(OpenMP REQUIRED) + target_link_libraries(openmp-common PUBLIC OpenMP::OpenMP_CXX) + set(ROCPROFSYS_OPENMP_USING_LIBOMP_LIBRARY + ON + CACHE INTERNAL + "Used by rocprofiler-systems testing" + FORCE + ) +else() + find_program(CLANGXX_EXECUTABLE NAMES clang++) + find_library( + LIBOMP_LIBRARY + NAMES omp omp5 ${CMAKE_SHARED_LIBRARY_PREFIX}omp${CMAKE_SHARED_LIBRARY_SUFFIX}.5 + ) + if( + CLANGXX_EXECUTABLE + AND LIBOMP_LIBRARY + AND COMMAND rocprofiler_systems_custom_compilation + AND USE_CLANG_OMP + ) + target_compile_options(openmp-common PUBLIC -W -Wall -fopenmp=libomp) + target_link_libraries(openmp-common PUBLIC ${LIBOMP_LIBRARY}) + rocprofiler_systems_custom_compilation(COMPILER ${CLANGXX_EXECUTABLE} + TARGET openmp-common + ) + rocprofiler_systems_custom_compilation(COMPILER ${CLANGXX_EXECUTABLE} + TARGET openmp-cg + ) + rocprofiler_systems_custom_compilation(COMPILER ${CLANGXX_EXECUTABLE} + TARGET openmp-lu + ) + set(ROCPROFSYS_OPENMP_USING_LIBOMP_LIBRARY + ON + CACHE INTERNAL + "Used by rocprofiler-systems testing" + FORCE + ) + else() + find_package(OpenMP REQUIRED) + target_link_libraries(openmp-common PUBLIC OpenMP::OpenMP_CXX) + set(ROCPROFSYS_OPENMP_USING_LIBOMP_LIBRARY + OFF + CACHE INTERNAL + "Used by rocprofiler-systems testing" + FORCE + ) + endif() +endif() + +target_link_libraries(openmp-cg PRIVATE openmp-common) +target_link_libraries(openmp-lu PRIVATE openmp-common) + +if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + TARGETS openmp-cg openmp-lu + DESTINATION bin + COMPONENT rocprofiler-systems-examples + ) +endif() + +if(ROCPROFSYS_DISABLE_EXAMPLES) + if(NOT "openmp-target" IN_LIST ROCPROFSYS_DISABLE_EXAMPLES) + add_subdirectory(target) + endif() +else() + add_subdirectory(target) +endif() diff --git a/projects/rocprofiler-systems/examples/openmp/LU/lu.cpp b/projects/rocprofiler-systems/examples/openmp/LU/lu.cpp new file mode 100644 index 0000000000..d905767a33 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/LU/lu.cpp @@ -0,0 +1,3563 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +Authors of the Fortran code: + S. Weeratunga + V. Venkatakrishnan + E. Barszcz + M. Yarrow + H. Jin + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff + +------------------------------------------------------------------------------ + +The OpenMP version is a parallel implementation of the serial C++ version +OpenMP version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-OMP + +Authors of the OpenMP code: + Júnior Löff + +*/ + +#include "../common/npb-CPP.hpp" +#include "npbparams.hpp" + +/* + * --------------------------------------------------------------------- + * driver for the performance evaluation of the solver for + * five coupled parabolic/elliptic partial differential equations + * --------------------------------------------------------------------- + * parameters which can be overridden in runtime config file + * isiz1,isiz2,isiz3 give the maximum size + * ipr = 1 to print out verbose information + * omega = 2.0 is correct for all classes + * tolrsd is tolerance levels for steady state residuals + * --------------------------------------------------------------------- + * field variables and residuals + * to improve cache performance, second two dimensions padded by 1 + * for even number sizes only. + * note: corresponding array (called "v") in routines blts, buts, + * and l2norm are similarly padded + * --------------------------------------------------------------------- + */ +#define IPR_DEFAULT 1 +#define OMEGA_DEFAULT 1.2 +#define TOLRSD1_DEF 1.0e-08 +#define TOLRSD2_DEF 1.0e-08 +#define TOLRSD3_DEF 1.0e-08 +#define TOLRSD4_DEF 1.0e-08 +#define TOLRSD5_DEF 1.0e-08 +#define C1 1.40e+00 +#define C2 0.40e+00 +#define C3 1.00e-01 +#define C4 1.00e+00 +#define C5 1.40e+00 +#define T_TOTAL 1 +#define T_RHSX 2 +#define T_RHSY 3 +#define T_RHSZ 4 +#define T_RHS 5 +#define T_JACLD 6 +#define T_BLTS 7 +#define T_JACU 8 +#define T_BUTS 9 +#define T_ADD 10 +#define T_L2NORM 11 +#define T_LAST 11 + +/* global variables */ +#if defined(DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION) +static double u[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]; +static double rsd[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]; +static double frct[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]; +static double flux[ISIZ1][5]; +static double qs[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]; +static double rho_i[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]; +static double a[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; +static double b[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; +static double c[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; +static double d[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; +static double ce[13][5]; +#else +static double (*u)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5] = + (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]) + malloc(sizeof(double) * + ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1) * (5))); +static double (*rsd)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5] = + (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]) + malloc(sizeof(double) * + ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1) * (5))); +static double (*frct)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5] = + (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]) + malloc(sizeof(double) * + ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1) * (5))); +static double (*flux)[5] = (double (*)[5]) malloc(sizeof(double) * ((ISIZ1) * (5))); +static double (*qs)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1] = + (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]) + malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1))); +static double (*rho_i)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1] = + (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]) + malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1))); +static double (*a)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) + malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); +static double (*b)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) + malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); +static double (*c)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) + malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); +static double (*d)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) + malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); +static double (*ce)[5] = (double (*)[5]) malloc(sizeof(double) * ((13) * (5))); +#endif +/* grid */ +static double dxi, deta, dzeta; +static double tx1, tx2, tx3; +static double ty1, ty2, ty3; +static double tz1, tz2, tz3; +static int nx, ny, nz; +static int nx0, ny0, nz0; +static int ist, iend; +static int jst, jend; +static int ii1, ii2; +static int ji1, ji2; +static int ki1, ki2; +/* dissipation */ +static double dx1, dx2, dx3, dx4, dx5; +static double dy1, dy2, dy3, dy4, dy5; +static double dz1, dz2, dz3, dz4, dz5; +static double dssp; +/* output control parameters */ +static int ipr, inorm; +/* newton-raphson iteration control parameters */ +static double dt, omega, tolrsd[5], rsdnm[5], errnm[5], frc; +static int itmax; +/* timer */ +static double maxtime; +static boolean timeron; + +/* function prototypes */ +void +blts(int nx, int ny, int nz, int k, double omega, + double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], + double ldz[][ISIZ1 / 2 * 2 + 1][5][5], double ldy[][ISIZ1 / 2 * 2 + 1][5][5], + double ldx[][ISIZ1 / 2 * 2 + 1][5][5], double d[][ISIZ1 / 2 * 2 + 1][5][5], int ist, + int iend, int jst, int jend, int nx0, int ny0); +void +buts(int nx, int ny, int nz, int k, double omega, + double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], void* pointer_tv, + double d[][ISIZ1 / 2 * 2 + 1][5][5], double udx[][ISIZ1 / 2 * 2 + 1][5][5], + double udy[][ISIZ1 / 2 * 2 + 1][5][5], double udz[][ISIZ1 / 2 * 2 + 1][5][5], + int ist, int iend, int jst, int jend, int nx0, int ny0); +void +domain(); +void +erhs(); +void +error(); +void +exact(int i, int j, int k, double u000ijk[]); +void +jacld(int k); +void +jacu(int k); +void +l2norm(int nx0, int ny0, int nz0, int ist, int iend, int jst, int jend, + double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], double sum[5]); +void +pintgr(); +void +read_input(); +void +rhs(); +void +setbv(); +void +setcoeff(); +void +setiv(); +void +ssor(int niter); +void +verify(double xcr[], double xce[], double xci, char* class_npb, boolean* verified); + +static boolean flag[ISIZ1 / 2 * 2 + 1]; +static boolean flag2[ISIZ1 / 2 * 2 + 1]; + +/* lu */ +int +main(int, char*[]) +{ +#if defined(DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION) + printf( + " DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION mode on\n"); +#endif + char class_npb; + boolean verified; + double mflops; + double t, tmax, trecs[T_LAST + 1]; + int i; + char* t_names[T_LAST + 1]; + /* + * --------------------------------------------------------------------- + * setup info for timers + * --------------------------------------------------------------------- + */ + FILE* fp; + if((fp = fopen("timer.flag", "r")) != NULL) + { + timeron = TRUE; + t_names[T_TOTAL] = (char*) "total"; + t_names[T_RHSX] = (char*) "rhsx"; + t_names[T_RHSY] = (char*) "rhsy"; + t_names[T_RHSZ] = (char*) "rhsz"; + t_names[T_RHS] = (char*) "rhs"; + t_names[T_JACLD] = (char*) "jacld"; + t_names[T_BLTS] = (char*) "blts"; + t_names[T_JACU] = (char*) "jacu"; + t_names[T_BUTS] = (char*) "buts"; + t_names[T_ADD] = (char*) "add"; + t_names[T_L2NORM] = (char*) "l2norm"; + fclose(fp); + } + else + { + timeron = FALSE; + } + /* + * --------------------------------------------------------------------- + * read input data + * --------------------------------------------------------------------- + */ + read_input(); + /* + * --------------------------------------------------------------------- + * set up domain sizes + * --------------------------------------------------------------------- + */ + domain(); + /* + * --------------------------------------------------------------------- + * set up coefficients + * --------------------------------------------------------------------- + */ + setcoeff(); + +#pragma omp parallel + { + /* + * --------------------------------------------------------------------- + * set the boundary values for dependent variables + * --------------------------------------------------------------------- + */ + setbv(); + /* + * --------------------------------------------------------------------- + * set the initial values for dependent variables + * --------------------------------------------------------------------- + */ + setiv(); + /* + * --------------------------------------------------------------------- + * compute the forcing term based on prescribed exact solution + * --------------------------------------------------------------------- + */ + erhs(); + } /* end parallel */ + + /* + * --------------------------------------------------------------------- + * perform one SSOR iteration to touch all pages + * --------------------------------------------------------------------- + */ + ssor(1); +#pragma omp parallel + { + /* + * --------------------------------------------------------------------- + * reset the boundary and initial values + * --------------------------------------------------------------------- + */ + setbv(); + setiv(); + } + + /* + * --------------------------------------------------------------------- + * perform the SSOR iterations + * --------------------------------------------------------------------- + */ + ssor(itmax); + /* + * --------------------------------------------------------------------- + * compute the solution error + * --------------------------------------------------------------------- + */ + error(); + /* + * --------------------------------------------------------------------- + * compute the surface integral + * --------------------------------------------------------------------- + */ + pintgr(); + /* + * --------------------------------------------------------------------- + * verification test + * --------------------------------------------------------------------- + */ + verify(rsdnm, errnm, frc, &class_npb, &verified); + mflops = (double) itmax * + (1984.77 * (double) nx0 * (double) ny0 * (double) nz0 - + 10923.3 * pow(((double) (nx0 + ny0 + nz0) / 3.0), 2.0) + + 27770.9 * (double) (nx0 + ny0 + nz0) / 3.0 - 144010.0) / + (maxtime * 1000000.0); + setenv("OMP_NUM_THREADS", "1", 0); + c_print_results((char*) "LU", class_npb, nx0, ny0, nz0, itmax, maxtime, mflops, + (char*) " floating point", verified, (char*) NPBVERSION, + (char*) COMPILETIME, (char*) COMPILERVERSION, (char*) LIBVERSION, + std::getenv("OMP_NUM_THREADS"), (char*) CS1, (char*) CS2, (char*) CS3, + (char*) CS4, (char*) CS5, (char*) CS6, (char*) "(none)"); + /* + * --------------------------------------------------------------------- + * more timers + * --------------------------------------------------------------------- + */ + if(timeron) + { + for(i = 1; i <= T_LAST; i++) + { + trecs[i] = timer_read(i); + } + tmax = maxtime; + if(tmax == 0.0) + { + tmax = 1.0; + } + printf(" SECTION Time (secs)\n"); + for(i = 1; i <= T_LAST; i++) + { + printf(" %-8s:%9.3f (%6.2f%%)\n", t_names[i], trecs[i], + trecs[i] * 100. / tmax); + if(i == T_RHS) + { + t = trecs[T_RHSX] + trecs[T_RHSY] + trecs[T_RHSZ]; + printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t * 100. / tmax); + t = trecs[i] - t; + printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t * 100. / tmax); + } + } + } + return 0; +} + +/* + * --------------------------------------------------------------------- + * compute the regular-sparse, block lower triangular solution: + * v <-- ( L-inv ) * v + * --------------------------------------------------------------------- + * to improve cache performance, second two dimensions padded by 1 + * for even number sizes only. only needed in v. + * --------------------------------------------------------------------- + */ +void +blts(int /*nx*/, int /*ny*/, int /*nz*/, int k, double omega, + double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], + double ldz[][ISIZ1 / 2 * 2 + 1][5][5], double ldy[][ISIZ1 / 2 * 2 + 1][5][5], + double ldx[][ISIZ1 / 2 * 2 + 1][5][5], double d[][ISIZ1 / 2 * 2 + 1][5][5], int ist, + int iend, int jst, int jend, int /*nx0*/, int /*ny0*/) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, m; + double tmp, tmp1; + double tmat[5][5], tv[5]; + +#pragma omp for nowait schedule(static) + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + v[k][j][i][m] = + v[k][j][i][m] - omega * (ldz[j][i][0][m] * v[k - 1][j][i][0] + + ldz[j][i][1][m] * v[k - 1][j][i][1] + + ldz[j][i][2][m] * v[k - 1][j][i][2] + + ldz[j][i][3][m] * v[k - 1][j][i][3] + + ldz[j][i][4][m] * v[k - 1][j][i][4]); + } + } + } + +#pragma omp for nowait schedule(static) + for(j = jst; j < jend; j++) + { + if(j != jst) + { + while(flag[j - 1] == 0) + { +#pragma omp flush + ; + } + } + if(j != jend - 1) + { + while(flag[j] == 1) + { +#pragma omp flush + ; + } + } + + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + tv[m] = v[k][j][i][m] - omega * (ldy[j][i][0][m] * v[k][j - 1][i][0] + + ldx[j][i][0][m] * v[k][j][i - 1][0] + + ldy[j][i][1][m] * v[k][j - 1][i][1] + + ldx[j][i][1][m] * v[k][j][i - 1][1] + + ldy[j][i][2][m] * v[k][j - 1][i][2] + + ldx[j][i][2][m] * v[k][j][i - 1][2] + + ldy[j][i][3][m] * v[k][j - 1][i][3] + + ldx[j][i][3][m] * v[k][j][i - 1][3] + + ldy[j][i][4][m] * v[k][j - 1][i][4] + + ldx[j][i][4][m] * v[k][j][i - 1][4]); + } + /* + * --------------------------------------------------------------------- + * diagonal block inversion + * + * forward elimination + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + tmat[0][m] = d[j][i][0][m]; + tmat[1][m] = d[j][i][1][m]; + tmat[2][m] = d[j][i][2][m]; + tmat[3][m] = d[j][i][3][m]; + tmat[4][m] = d[j][i][4][m]; + } + /* */ + tmp1 = 1.0 / tmat[0][0]; + tmp = tmp1 * tmat[0][1]; + tmat[1][1] = tmat[1][1] - tmp * tmat[1][0]; + tmat[2][1] = tmat[2][1] - tmp * tmat[2][0]; + tmat[3][1] = tmat[3][1] - tmp * tmat[3][0]; + tmat[4][1] = tmat[4][1] - tmp * tmat[4][0]; + tv[1] = tv[1] - tv[0] * tmp; + /* */ + tmp = tmp1 * tmat[0][2]; + tmat[1][2] = tmat[1][2] - tmp * tmat[1][0]; + tmat[2][2] = tmat[2][2] - tmp * tmat[2][0]; + tmat[3][2] = tmat[3][2] - tmp * tmat[3][0]; + tmat[4][2] = tmat[4][2] - tmp * tmat[4][0]; + tv[2] = tv[2] - tv[0] * tmp; + /* */ + tmp = tmp1 * tmat[0][3]; + tmat[1][3] = tmat[1][3] - tmp * tmat[1][0]; + tmat[2][3] = tmat[2][3] - tmp * tmat[2][0]; + tmat[3][3] = tmat[3][3] - tmp * tmat[3][0]; + tmat[4][3] = tmat[4][3] - tmp * tmat[4][0]; + tv[3] = tv[3] - tv[0] * tmp; + /* */ + tmp = tmp1 * tmat[0][4]; + tmat[1][4] = tmat[1][4] - tmp * tmat[1][0]; + tmat[2][4] = tmat[2][4] - tmp * tmat[2][0]; + tmat[3][4] = tmat[3][4] - tmp * tmat[3][0]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][0]; + tv[4] = tv[4] - tv[0] * tmp; + /* */ + tmp1 = 1.0 / tmat[1][1]; + tmp = tmp1 * tmat[1][2]; + tmat[2][2] = tmat[2][2] - tmp * tmat[2][1]; + tmat[3][2] = tmat[3][2] - tmp * tmat[3][1]; + tmat[4][2] = tmat[4][2] - tmp * tmat[4][1]; + tv[2] = tv[2] - tv[1] * tmp; + /* */ + tmp = tmp1 * tmat[1][3]; + tmat[2][3] = tmat[2][3] - tmp * tmat[2][1]; + tmat[3][3] = tmat[3][3] - tmp * tmat[3][1]; + tmat[4][3] = tmat[4][3] - tmp * tmat[4][1]; + tv[3] = tv[3] - tv[1] * tmp; + /* */ + tmp = tmp1 * tmat[1][4]; + tmat[2][4] = tmat[2][4] - tmp * tmat[2][1]; + tmat[3][4] = tmat[3][4] - tmp * tmat[3][1]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][1]; + tv[4] = tv[4] - tv[1] * tmp; + /* */ + tmp1 = 1.0 / tmat[2][2]; + tmp = tmp1 * tmat[2][3]; + tmat[3][3] = tmat[3][3] - tmp * tmat[3][2]; + tmat[4][3] = tmat[4][3] - tmp * tmat[4][2]; + tv[3] = tv[3] - tv[2] * tmp; + /* */ + tmp = tmp1 * tmat[2][4]; + tmat[3][4] = tmat[3][4] - tmp * tmat[3][2]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][2]; + tv[4] = tv[4] - tv[2] * tmp; + /* */ + tmp1 = 1.0 / tmat[3][3]; + tmp = tmp1 * tmat[3][4]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][3]; + tv[4] = tv[4] - tv[3] * tmp; + /* + * --------------------------------------------------------------------- + * back substitution + * --------------------------------------------------------------------- + */ + v[k][j][i][4] = tv[4] / tmat[4][4]; + tv[3] = tv[3] - tmat[4][3] * v[k][j][i][4]; + v[k][j][i][3] = tv[3] / tmat[3][3]; + tv[2] = tv[2] - tmat[3][2] * v[k][j][i][3] - tmat[4][2] * v[k][j][i][4]; + v[k][j][i][2] = tv[2] / tmat[2][2]; + tv[1] = tv[1] - tmat[2][1] * v[k][j][i][2] - tmat[3][1] * v[k][j][i][3] - + tmat[4][1] * v[k][j][i][4]; + v[k][j][i][1] = tv[1] / tmat[1][1]; + tv[0] = tv[0] - tmat[1][0] * v[k][j][i][1] - tmat[2][0] * v[k][j][i][2] - + tmat[3][0] * v[k][j][i][3] - tmat[4][0] * v[k][j][i][4]; + v[k][j][i][0] = tv[0] / tmat[0][0]; + } + + if(j != jend - 1) flag[j] = 1; + if(j != jst) flag[j - 1] = 0; + } +} + +/* + * --------------------------------------------------------------------- + * compute the regular-sparse, block upper triangular solution: + * v <-- ( U-inv ) * v + * --------------------------------------------------------------------- + * to improve cache performance, second two dimensions padded by 1 + * for even number sizes only. only needed in v. + * --------------------------------------------------------------------- + */ +void +buts(int /*nx*/, int /*ny*/, int /*nz*/, int k, double omega, + double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], void* pointer_tv, + double d[][ISIZ1 / 2 * 2 + 1][5][5], double udx[][ISIZ1 / 2 * 2 + 1][5][5], + double udy[][ISIZ1 / 2 * 2 + 1][5][5], double udz[][ISIZ1 / 2 * 2 + 1][5][5], + int ist, int iend, int jst, int jend, int /*nx0*/, int /*ny0*/) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + double(*tv)[ISIZ1 / 2 * 2 + 1][5] = (double(*)[ISIZ1 / 2 * 2 + 1][5]) pointer_tv; + int i, j, m; + double tmp, tmp1; + double tmat[5][5]; + +#pragma omp for nowait schedule(static) + for(j = jend - 1; j >= jst; j--) + { + for(i = iend - 1; i >= ist; i--) + { + for(m = 0; m < 5; m++) + { + tv[j][i][m] = omega * (udz[j][i][0][m] * v[k + 1][j][i][0] + + udz[j][i][1][m] * v[k + 1][j][i][1] + + udz[j][i][2][m] * v[k + 1][j][i][2] + + udz[j][i][3][m] * v[k + 1][j][i][3] + + udz[j][i][4][m] * v[k + 1][j][i][4]); + } + } + } + +#pragma omp for nowait schedule(static) + for(j = jend - 1; j >= jst; j--) + { + if(j != jend - 1) + { + while(flag2[j + 1] == 0) + { +#pragma omp flush + ; + } + } + if(j != jst) + { + while(flag2[j] == 1) + { +#pragma omp flush + ; + } + } + + for(i = iend - 1; i >= ist; i--) + { + for(m = 0; m < 5; m++) + { + tv[j][i][m] = tv[j][i][m] + omega * (udy[j][i][0][m] * v[k][j + 1][i][0] + + udx[j][i][0][m] * v[k][j][i + 1][0] + + udy[j][i][1][m] * v[k][j + 1][i][1] + + udx[j][i][1][m] * v[k][j][i + 1][1] + + udy[j][i][2][m] * v[k][j + 1][i][2] + + udx[j][i][2][m] * v[k][j][i + 1][2] + + udy[j][i][3][m] * v[k][j + 1][i][3] + + udx[j][i][3][m] * v[k][j][i + 1][3] + + udy[j][i][4][m] * v[k][j + 1][i][4] + + udx[j][i][4][m] * v[k][j][i + 1][4]); + } + /* + * --------------------------------------------------------------------- + * diagonal block inversion + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + tmat[0][m] = d[j][i][0][m]; + tmat[1][m] = d[j][i][1][m]; + tmat[2][m] = d[j][i][2][m]; + tmat[3][m] = d[j][i][3][m]; + tmat[4][m] = d[j][i][4][m]; + } + /* */ + tmp1 = 1.0 / tmat[0][0]; + tmp = tmp1 * tmat[0][1]; + tmat[1][1] = tmat[1][1] - tmp * tmat[1][0]; + tmat[2][1] = tmat[2][1] - tmp * tmat[2][0]; + tmat[3][1] = tmat[3][1] - tmp * tmat[3][0]; + tmat[4][1] = tmat[4][1] - tmp * tmat[4][0]; + tv[j][i][1] = tv[j][i][1] - tv[j][i][0] * tmp; + /* */ + tmp = tmp1 * tmat[0][2]; + tmat[1][2] = tmat[1][2] - tmp * tmat[1][0]; + tmat[2][2] = tmat[2][2] - tmp * tmat[2][0]; + tmat[3][2] = tmat[3][2] - tmp * tmat[3][0]; + tmat[4][2] = tmat[4][2] - tmp * tmat[4][0]; + tv[j][i][2] = tv[j][i][2] - tv[j][i][0] * tmp; + /* */ + tmp = tmp1 * tmat[0][3]; + tmat[1][3] = tmat[1][3] - tmp * tmat[1][0]; + tmat[2][3] = tmat[2][3] - tmp * tmat[2][0]; + tmat[3][3] = tmat[3][3] - tmp * tmat[3][0]; + tmat[4][3] = tmat[4][3] - tmp * tmat[4][0]; + tv[j][i][3] = tv[j][i][3] - tv[j][i][0] * tmp; + /* */ + tmp = tmp1 * tmat[0][4]; + tmat[1][4] = tmat[1][4] - tmp * tmat[1][0]; + tmat[2][4] = tmat[2][4] - tmp * tmat[2][0]; + tmat[3][4] = tmat[3][4] - tmp * tmat[3][0]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][0]; + tv[j][i][4] = tv[j][i][4] - tv[j][i][0] * tmp; + /* */ + tmp1 = 1.0 / tmat[1][1]; + tmp = tmp1 * tmat[1][2]; + tmat[2][2] = tmat[2][2] - tmp * tmat[2][1]; + tmat[3][2] = tmat[3][2] - tmp * tmat[3][1]; + tmat[4][2] = tmat[4][2] - tmp * tmat[4][1]; + tv[j][i][2] = tv[j][i][2] - tv[j][i][1] * tmp; + /* */ + tmp = tmp1 * tmat[1][3]; + tmat[2][3] = tmat[2][3] - tmp * tmat[2][1]; + tmat[3][3] = tmat[3][3] - tmp * tmat[3][1]; + tmat[4][3] = tmat[4][3] - tmp * tmat[4][1]; + tv[j][i][3] = tv[j][i][3] - tv[j][i][1] * tmp; + /* */ + tmp = tmp1 * tmat[1][4]; + tmat[2][4] = tmat[2][4] - tmp * tmat[2][1]; + tmat[3][4] = tmat[3][4] - tmp * tmat[3][1]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][1]; + tv[j][i][4] = tv[j][i][4] - tv[j][i][1] * tmp; + /* */ + tmp1 = 1.0 / tmat[2][2]; + tmp = tmp1 * tmat[2][3]; + tmat[3][3] = tmat[3][3] - tmp * tmat[3][2]; + tmat[4][3] = tmat[4][3] - tmp * tmat[4][2]; + tv[j][i][3] = tv[j][i][3] - tv[j][i][2] * tmp; + /* */ + tmp = tmp1 * tmat[2][4]; + tmat[3][4] = tmat[3][4] - tmp * tmat[3][2]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][2]; + tv[j][i][4] = tv[j][i][4] - tv[j][i][2] * tmp; + /* */ + tmp1 = 1.0 / tmat[3][3]; + tmp = tmp1 * tmat[3][4]; + tmat[4][4] = tmat[4][4] - tmp * tmat[4][3]; + tv[j][i][4] = tv[j][i][4] - tv[j][i][3] * tmp; + /* + * --------------------------------------------------------------------- + * back substitution + * --------------------------------------------------------------------- + */ + tv[j][i][4] = tv[j][i][4] / tmat[4][4]; + tv[j][i][3] = tv[j][i][3] - tmat[4][3] * tv[j][i][4]; + tv[j][i][3] = tv[j][i][3] / tmat[3][3]; + tv[j][i][2] = + tv[j][i][2] - tmat[3][2] * tv[j][i][3] - tmat[4][2] * tv[j][i][4]; + tv[j][i][2] = tv[j][i][2] / tmat[2][2]; + tv[j][i][1] = tv[j][i][1] - tmat[2][1] * tv[j][i][2] - + tmat[3][1] * tv[j][i][3] - tmat[4][1] * tv[j][i][4]; + tv[j][i][1] = tv[j][i][1] / tmat[1][1]; + tv[j][i][0] = tv[j][i][0] - tmat[1][0] * tv[j][i][1] - + tmat[2][0] * tv[j][i][2] - tmat[3][0] * tv[j][i][3] - + tmat[4][0] * tv[j][i][4]; + tv[j][i][0] = tv[j][i][0] / tmat[0][0]; + v[k][j][i][0] = v[k][j][i][0] - tv[j][i][0]; + v[k][j][i][1] = v[k][j][i][1] - tv[j][i][1]; + v[k][j][i][2] = v[k][j][i][2] - tv[j][i][2]; + v[k][j][i][3] = v[k][j][i][3] - tv[j][i][3]; + v[k][j][i][4] = v[k][j][i][4] - tv[j][i][4]; + } + + if(j != jend - 1) flag2[j + 1] = 0; + if(j != jst) flag2[j] = 1; + } +} + +void +domain() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + nx = nx0; + ny = ny0; + nz = nz0; + /* + * --------------------------------------------------------------------- + * check the sub-domain size + * --------------------------------------------------------------------- + */ + if((nx < 4) || (ny < 4) || (nz < 4)) + { + printf(" SUBDOMAIN SIZE IS TOO SMALL - \n" + " ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS\n" + " SO THAT NX, NY AND NZ ARE GREATER THAN OR EQUAL\n" + " TO 4 THEY ARE CURRENTLY%3d%3d%3d\n", + nx, ny, nz); + exit(EXIT_FAILURE); + } + if((nx > ISIZ1) || (ny > ISIZ2) || (nz > ISIZ3)) + { + printf(" SUBDOMAIN SIZE IS TOO LARGE - \n" + " ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS\n" + " SO THAT NX, NY AND NZ ARE LESS THAN OR EQUAL TO \n" + " ISIZ1, ISIZ2 AND ISIZ3 RESPECTIVELY. THEY ARE\n" + " CURRENTLYi%4d%4d%4d\n", + nx, ny, nz); + exit(EXIT_FAILURE); + } + /* + * --------------------------------------------------------------------- + * set up the start and end in i and j extents for all processors + * --------------------------------------------------------------------- + */ + ist = 1; + iend = nx - 1; + jst = 1; + jend = ny - 1; + ii1 = 1; + ii2 = nx0 - 1; + ji1 = 1; + ji2 = ny0 - 2; + ki1 = 2; + ki2 = nz0 - 1; +} + +/* + * --------------------------------------------------------------------- + * compute the right hand side based on exact solution + * --------------------------------------------------------------------- + */ +void +erhs() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m; + double xi, eta, zeta; + double q; + double u21, u31, u41; + double tmp; + double u21i, u31i, u41i, u51i; + double u21j, u31j, u41j, u51j; + double u21k, u31k, u41k, u51k; + double u21im1, u31im1, u41im1, u51im1; + double u21jm1, u31jm1, u41jm1, u51jm1; + double u21km1, u31km1, u41km1, u51km1; + double flux[ISIZ1][5]; + +#pragma omp for + for(k = 0; k < nz; k++) + { + for(j = 0; j < ny; j++) + { + for(i = 0; i < nx; i++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = 0.0; + } + } + } + } + +#pragma omp for + for(k = 0; k < nz; k++) + { + zeta = ((double) k) / (nz - 1); + for(j = 0; j < ny; j++) + { + eta = ((double) j) / (ny0 - 1); + for(i = 0; i < nx; i++) + { + xi = ((double) i) / (nx0 - 1); + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = + ce[0][m] + + (ce[1][m] + (ce[4][m] + (ce[7][m] + ce[10][m] * xi) * xi) * xi) * + xi + + (ce[2][m] + + (ce[5][m] + (ce[8][m] + ce[11][m] * eta) * eta) * eta) * + eta + + (ce[3][m] + + (ce[6][m] + (ce[9][m] + ce[12][m] * zeta) * zeta) * zeta) * + zeta; + } + } + } + } +/* + * --------------------------------------------------------------------- + * xi-direction flux differences + * --------------------------------------------------------------------- + */ +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + for(j = jst; j < jend; j++) + { + for(i = 0; i < nx; i++) + { + flux[i][0] = rsd[k][j][i][1]; + u21 = rsd[k][j][i][1] / rsd[k][j][i][0]; + q = 0.50 * + (rsd[k][j][i][1] * rsd[k][j][i][1] + + rsd[k][j][i][2] * rsd[k][j][i][2] + + rsd[k][j][i][3] * rsd[k][j][i][3]) / + rsd[k][j][i][0]; + flux[i][1] = rsd[k][j][i][1] * u21 + C2 * (rsd[k][j][i][4] - q); + flux[i][2] = rsd[k][j][i][2] * u21; + flux[i][3] = rsd[k][j][i][3] * u21; + flux[i][4] = (C1 * rsd[k][j][i][4] - C2 * q) * u21; + } + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = + frct[k][j][i][m] - tx2 * (flux[i + 1][m] - flux[i - 1][m]); + } + } + for(i = ist; i < nx; i++) + { + tmp = 1.0 / rsd[k][j][i][0]; + u21i = tmp * rsd[k][j][i][1]; + u31i = tmp * rsd[k][j][i][2]; + u41i = tmp * rsd[k][j][i][3]; + u51i = tmp * rsd[k][j][i][4]; + tmp = 1.0 / rsd[k][j][i - 1][0]; + u21im1 = tmp * rsd[k][j][i - 1][1]; + u31im1 = tmp * rsd[k][j][i - 1][2]; + u41im1 = tmp * rsd[k][j][i - 1][3]; + u51im1 = tmp * rsd[k][j][i - 1][4]; + flux[i][1] = (4.0 / 3.0) * tx3 * (u21i - u21im1); + flux[i][2] = tx3 * (u31i - u31im1); + flux[i][3] = tx3 * (u41i - u41im1); + flux[i][4] = 0.50 * (1.0 - C1 * C5) * tx3 * + ((u21i * u21i + u31i * u31i + u41i * u41i) - + (u21im1 * u21im1 + u31im1 * u31im1 + u41im1 * u41im1)) + + (1.0 / 6.0) * tx3 * (u21i * u21i - u21im1 * u21im1) + + C1 * C5 * tx3 * (u51i - u51im1); + } + for(i = ist; i < iend; i++) + { + frct[k][j][i][0] = + frct[k][j][i][0] + dx1 * tx1 * + (rsd[k][j][i - 1][0] - 2.0 * rsd[k][j][i][0] + + rsd[k][j][i + 1][0]); + frct[k][j][i][1] = frct[k][j][i][1] + + tx3 * C3 * C4 * (flux[i + 1][1] - flux[i][1]) + + dx2 * tx1 * + (rsd[k][j][i - 1][1] - 2.0 * rsd[k][j][i][1] + + rsd[k][j][i + 1][1]); + frct[k][j][i][2] = frct[k][j][i][2] + + tx3 * C3 * C4 * (flux[i + 1][2] - flux[i][2]) + + dx3 * tx1 * + (rsd[k][j][i - 1][2] - 2.0 * rsd[k][j][i][2] + + rsd[k][j][i + 1][2]); + frct[k][j][i][3] = frct[k][j][i][3] + + tx3 * C3 * C4 * (flux[i + 1][3] - flux[i][3]) + + dx4 * tx1 * + (rsd[k][j][i - 1][3] - 2.0 * rsd[k][j][i][3] + + rsd[k][j][i + 1][3]); + frct[k][j][i][4] = frct[k][j][i][4] + + tx3 * C3 * C4 * (flux[i + 1][4] - flux[i][4]) + + dx5 * tx1 * + (rsd[k][j][i - 1][4] - 2.0 * rsd[k][j][i][4] + + rsd[k][j][i + 1][4]); + } + /* + * --------------------------------------------------------------------- + * fourth-order dissipation + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + frct[k][j][1][m] = + frct[k][j][1][m] - dssp * (+5.0 * rsd[k][j][1][m] - + 4.0 * rsd[k][j][2][m] + rsd[k][j][3][m]); + frct[k][j][2][m] = + frct[k][j][2][m] - + dssp * (-4.0 * rsd[k][j][1][m] + 6.0 * rsd[k][j][2][m] - + 4.0 * rsd[k][j][3][m] + rsd[k][j][4][m]); + } + for(i = 3; i < nx - 3; i++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = + frct[k][j][i][m] - + dssp * (rsd[k][j][i - 2][m] - 4.0 * rsd[k][j][i - 1][m] + + 6.0 * rsd[k][j][i][m] - 4.0 * rsd[k][j][i + 1][m] + + rsd[k][j][i + 2][m]); + } + } + for(m = 0; m < 5; m++) + { + frct[k][j][nx - 3][m] = + frct[k][j][nx - 3][m] - + dssp * (rsd[k][j][nx - 5][m] - 4.0 * rsd[k][j][nx - 4][m] + + 6.0 * rsd[k][j][nx - 3][m] - 4.0 * rsd[k][j][nx - 2][m]); + frct[k][j][nx - 2][m] = + frct[k][j][nx - 2][m] - + dssp * (rsd[k][j][nx - 4][m] - 4.0 * rsd[k][j][nx - 3][m] + + 5.0 * rsd[k][j][nx - 2][m]); + } + } + } +/* + * --------------------------------------------------------------------- + * eta-direction flux differences + * --------------------------------------------------------------------- + */ +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + for(i = ist; i < iend; i++) + { + for(j = 0; j < ny; j++) + { + flux[j][0] = rsd[k][j][i][2]; + u31 = rsd[k][j][i][2] / rsd[k][j][i][0]; + q = 0.50 * + (rsd[k][j][i][1] * rsd[k][j][i][1] + + rsd[k][j][i][2] * rsd[k][j][i][2] + + rsd[k][j][i][3] * rsd[k][j][i][3]) / + rsd[k][j][i][0]; + flux[j][1] = rsd[k][j][i][1] * u31; + flux[j][2] = rsd[k][j][i][2] * u31 + C2 * (rsd[k][j][i][4] - q); + flux[j][3] = rsd[k][j][i][3] * u31; + flux[j][4] = (C1 * rsd[k][j][i][4] - C2 * q) * u31; + } + for(j = jst; j < jend; j++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = + frct[k][j][i][m] - ty2 * (flux[j + 1][m] - flux[j - 1][m]); + } + } + for(j = jst; j < ny; j++) + { + tmp = 1.0 / rsd[k][j][i][0]; + u21j = tmp * rsd[k][j][i][1]; + u31j = tmp * rsd[k][j][i][2]; + u41j = tmp * rsd[k][j][i][3]; + u51j = tmp * rsd[k][j][i][4]; + tmp = 1.0 / rsd[k][j - 1][i][0]; + u21jm1 = tmp * rsd[k][j - 1][i][1]; + u31jm1 = tmp * rsd[k][j - 1][i][2]; + u41jm1 = tmp * rsd[k][j - 1][i][3]; + u51jm1 = tmp * rsd[k][j - 1][i][4]; + flux[j][1] = ty3 * (u21j - u21jm1); + flux[j][2] = (4.0 / 3.0) * ty3 * (u31j - u31jm1); + flux[j][3] = ty3 * (u41j - u41jm1); + flux[j][4] = 0.50 * (1.0 - C1 * C5) * ty3 * + ((u21j * u21j + u31j * u31j + u41j * u41j) - + (u21jm1 * u21jm1 + u31jm1 * u31jm1 + u41jm1 * u41jm1)) + + (1.0 / 6.0) * ty3 * (u31j * u31j - u31jm1 * u31jm1) + + C1 * C5 * ty3 * (u51j - u51jm1); + } + for(j = jst; j < jend; j++) + { + frct[k][j][i][0] = + frct[k][j][i][0] + dy1 * ty1 * + (rsd[k][j - 1][i][0] - 2.0 * rsd[k][j][i][0] + + rsd[k][j + 1][i][0]); + frct[k][j][i][1] = frct[k][j][i][1] + + ty3 * C3 * C4 * (flux[j + 1][1] - flux[j][1]) + + dy2 * ty1 * + (rsd[k][j - 1][i][1] - 2.0 * rsd[k][j][i][1] + + rsd[k][j + 1][i][1]); + frct[k][j][i][2] = frct[k][j][i][2] + + ty3 * C3 * C4 * (flux[j + 1][2] - flux[j][2]) + + dy3 * ty1 * + (rsd[k][j - 1][i][2] - 2.0 * rsd[k][j][i][2] + + rsd[k][j + 1][i][2]); + frct[k][j][i][3] = frct[k][j][i][3] + + ty3 * C3 * C4 * (flux[j + 1][3] - flux[j][3]) + + dy4 * ty1 * + (rsd[k][j - 1][i][3] - 2.0 * rsd[k][j][i][3] + + rsd[k][j + 1][i][3]); + frct[k][j][i][4] = frct[k][j][i][4] + + ty3 * C3 * C4 * (flux[j + 1][4] - flux[j][4]) + + dy5 * ty1 * + (rsd[k][j - 1][i][4] - 2.0 * rsd[k][j][i][4] + + rsd[k][j + 1][i][4]); + } + /* + * --------------------------------------------------------------------- + * fourth-order dissipation + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + frct[k][1][i][m] = + frct[k][1][i][m] - dssp * (+5.0 * rsd[k][1][i][m] - + 4.0 * rsd[k][2][i][m] + rsd[k][3][i][m]); + frct[k][2][i][m] = + frct[k][2][i][m] - + dssp * (-4.0 * rsd[k][1][i][m] + 6.0 * rsd[k][2][i][m] - + 4.0 * rsd[k][3][i][m] + rsd[k][4][i][m]); + } + for(j = 3; j < ny - 3; j++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = + frct[k][j][i][m] - + dssp * (rsd[k][j - 2][i][m] - 4.0 * rsd[k][j - 1][i][m] + + 6.0 * rsd[k][j][i][m] - 4.0 * rsd[k][j + 1][i][m] + + rsd[k][j + 2][i][m]); + } + } + for(m = 0; m < 5; m++) + { + frct[k][ny - 3][i][m] = + frct[k][ny - 3][i][m] - + dssp * (rsd[k][ny - 5][i][m] - 4.0 * rsd[k][ny - 4][i][m] + + 6.0 * rsd[k][ny - 3][i][m] - 4.0 * rsd[k][ny - 2][i][m]); + frct[k][ny - 2][i][m] = + frct[k][ny - 2][i][m] - + dssp * (rsd[k][ny - 4][i][m] - 4.0 * rsd[k][ny - 3][i][m] + + 5.0 * rsd[k][ny - 2][i][m]); + } + } + } +/* + * --------------------------------------------------------------------- + * zeta-direction flux differences + * --------------------------------------------------------------------- + */ +#pragma omp for + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + for(k = 0; k < nz; k++) + { + flux[k][0] = rsd[k][j][i][3]; + u41 = rsd[k][j][i][3] / rsd[k][j][i][0]; + q = 0.50 * + (rsd[k][j][i][1] * rsd[k][j][i][1] + + rsd[k][j][i][2] * rsd[k][j][i][2] + + rsd[k][j][i][3] * rsd[k][j][i][3]) / + rsd[k][j][i][0]; + flux[k][1] = rsd[k][j][i][1] * u41; + flux[k][2] = rsd[k][j][i][2] * u41; + flux[k][3] = rsd[k][j][i][3] * u41 + C2 * (rsd[k][j][i][4] - q); + flux[k][4] = (C1 * rsd[k][j][i][4] - C2 * q) * u41; + } + for(k = 1; k < nz - 1; k++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = + frct[k][j][i][m] - tz2 * (flux[k + 1][m] - flux[k - 1][m]); + } + } + for(k = 1; k < nz; k++) + { + tmp = 1.0 / rsd[k][j][i][0]; + u21k = tmp * rsd[k][j][i][1]; + u31k = tmp * rsd[k][j][i][2]; + u41k = tmp * rsd[k][j][i][3]; + u51k = tmp * rsd[k][j][i][4]; + tmp = 1.0 / rsd[k - 1][j][i][0]; + u21km1 = tmp * rsd[k - 1][j][i][1]; + u31km1 = tmp * rsd[k - 1][j][i][2]; + u41km1 = tmp * rsd[k - 1][j][i][3]; + u51km1 = tmp * rsd[k - 1][j][i][4]; + flux[k][1] = tz3 * (u21k - u21km1); + flux[k][2] = tz3 * (u31k - u31km1); + flux[k][3] = (4.0 / 3.0) * tz3 * (u41k - u41km1); + flux[k][4] = 0.50 * (1.0 - C1 * C5) * tz3 * + ((u21k * u21k + u31k * u31k + u41k * u41k) - + (u21km1 * u21km1 + u31km1 * u31km1 + u41km1 * u41km1)) + + (1.0 / 6.0) * tz3 * (u41k * u41k - u41km1 * u41km1) + + C1 * C5 * tz3 * (u51k - u51km1); + } + for(k = 1; k < nz - 1; k++) + { + frct[k][j][i][0] = + frct[k][j][i][0] + dz1 * tz1 * + (rsd[k + 1][j][i][0] - 2.0 * rsd[k][j][i][0] + + rsd[k - 1][j][i][0]); + frct[k][j][i][1] = frct[k][j][i][1] + + tz3 * C3 * C4 * (flux[k + 1][1] - flux[k][1]) + + dz2 * tz1 * + (rsd[k + 1][j][i][1] - 2.0 * rsd[k][j][i][1] + + rsd[k - 1][j][i][1]); + frct[k][j][i][2] = frct[k][j][i][2] + + tz3 * C3 * C4 * (flux[k + 1][2] - flux[k][2]) + + dz3 * tz1 * + (rsd[k + 1][j][i][2] - 2.0 * rsd[k][j][i][2] + + rsd[k - 1][j][i][2]); + frct[k][j][i][3] = frct[k][j][i][3] + + tz3 * C3 * C4 * (flux[k + 1][3] - flux[k][3]) + + dz4 * tz1 * + (rsd[k + 1][j][i][3] - 2.0 * rsd[k][j][i][3] + + rsd[k - 1][j][i][3]); + frct[k][j][i][4] = frct[k][j][i][4] + + tz3 * C3 * C4 * (flux[k + 1][4] - flux[k][4]) + + dz5 * tz1 * + (rsd[k + 1][j][i][4] - 2.0 * rsd[k][j][i][4] + + rsd[k - 1][j][i][4]); + } + /* + * --------------------------------------------------------------------- + * fourth-order dissipation + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + frct[1][j][i][m] = + frct[1][j][i][m] - dssp * (+5.0 * rsd[1][j][i][m] - + 4.0 * rsd[2][j][i][m] + rsd[3][j][i][m]); + frct[2][j][i][m] = + frct[2][j][i][m] - + dssp * (-4.0 * rsd[1][j][i][m] + 6.0 * rsd[2][j][i][m] - + 4.0 * rsd[3][j][i][m] + rsd[4][j][i][m]); + } + for(k = 3; k < nz - 3; k++) + { + for(m = 0; m < 5; m++) + { + frct[k][j][i][m] = + frct[k][j][i][m] - + dssp * (rsd[k - 2][j][i][m] - 4.0 * rsd[k - 1][j][i][m] + + 6.0 * rsd[k][j][i][m] - 4.0 * rsd[k + 1][j][i][m] + + rsd[k + 2][j][i][m]); + } + } + for(m = 0; m < 5; m++) + { + frct[nz - 3][j][i][m] = + frct[nz - 3][j][i][m] - + dssp * (rsd[nz - 5][j][i][m] - 4.0 * rsd[nz - 4][j][i][m] + + 6.0 * rsd[nz - 3][j][i][m] - 4.0 * rsd[nz - 2][j][i][m]); + frct[nz - 2][j][i][m] = + frct[nz - 2][j][i][m] - + dssp * (rsd[nz - 4][j][i][m] - 4.0 * rsd[nz - 3][j][i][m] + + 5.0 * rsd[nz - 2][j][i][m]); + } + } + } +} + +/* + * --------------------------------------------------------------------- + * compute the solution error + * --------------------------------------------------------------------- + */ +void +error() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m; + double tmp; + double u000ijk[5]; + for(m = 0; m < 5; m++) + { + errnm[m] = 0.0; + } + for(k = 1; k < nz - 1; k++) + { + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + exact(i, j, k, u000ijk); + for(m = 0; m < 5; m++) + { + tmp = (u000ijk[m] - u[k][j][i][m]); + errnm[m] = errnm[m] + tmp * tmp; + } + } + } + } + for(m = 0; m < 5; m++) + { + errnm[m] = sqrt(errnm[m] / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2))); + } +} + +/* + * --------------------------------------------------------------------- + * compute the exact solution at (i,j,k) + * --------------------------------------------------------------------- + */ +void +exact(int i, int j, int k, double u000ijk[]) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int m; + double xi, eta, zeta; + xi = ((double) i) / (nx0 - 1); + eta = ((double) j) / (ny0 - 1); + zeta = ((double) k) / (nz - 1); + for(m = 0; m < 5; m++) + { + u000ijk[m] = + ce[0][m] + + (ce[1][m] + (ce[4][m] + (ce[7][m] + ce[10][m] * xi) * xi) * xi) * xi + + (ce[2][m] + (ce[5][m] + (ce[8][m] + ce[11][m] * eta) * eta) * eta) * eta + + (ce[3][m] + (ce[6][m] + (ce[9][m] + ce[12][m] * zeta) * zeta) * zeta) * zeta; + } +} + +/* + * --------------------------------------------------------------------- + * compute the lower triangular part of the jacobian matrix + * --------------------------------------------------------------------- + */ +void +jacld(int k) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j; + double r43; + double c1345; + double c34; + double tmp1, tmp2, tmp3; + r43 = (4.0 / 3.0); + c1345 = C1 * C3 * C4 * C5; + c34 = C3 * C4; + +#pragma omp for nowait schedule(static) + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + /* + * --------------------------------------------------------------------- + * form the block daigonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k][j][i]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + d[j][i][0][0] = 1.0 + dt * 2.0 * (tx1 * dx1 + ty1 * dy1 + tz1 * dz1); + d[j][i][1][0] = 0.0; + d[j][i][2][0] = 0.0; + d[j][i][3][0] = 0.0; + d[j][i][4][0] = 0.0; + d[j][i][0][1] = + -dt * 2.0 * (tx1 * r43 + ty1 + tz1) * c34 * tmp2 * u[k][j][i][1]; + d[j][i][1][1] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 * r43 + ty1 + tz1) + + dt * 2.0 * (tx1 * dx2 + ty1 * dy2 + tz1 * dz2); + d[j][i][2][1] = 0.0; + d[j][i][3][1] = 0.0; + d[j][i][4][1] = 0.0; + d[j][i][0][2] = + -dt * 2.0 * (tx1 + ty1 * r43 + tz1) * c34 * tmp2 * u[k][j][i][2]; + d[j][i][1][2] = 0.0; + d[j][i][2][2] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 * r43 + tz1) + + dt * 2.0 * (tx1 * dx3 + ty1 * dy3 + tz1 * dz3); + d[j][i][3][2] = 0.0; + d[j][i][4][2] = 0.0; + d[j][i][0][3] = + -dt * 2.0 * (tx1 + ty1 + tz1 * r43) * c34 * tmp2 * u[k][j][i][3]; + d[j][i][1][3] = 0.0; + d[j][i][2][3] = 0.0; + d[j][i][3][3] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 + tz1 * r43) + + dt * 2.0 * (tx1 * dx4 + ty1 * dy4 + tz1 * dz4); + d[j][i][4][3] = 0.0; + d[j][i][0][4] = -dt * 2.0 * + (((tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + + tz1 * (c34 - c1345)) * + (u[k][j][i][1] * u[k][j][i][1]) + + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + + tz1 * (c34 - c1345)) * + (u[k][j][i][2] * u[k][j][i][2]) + + (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + + tz1 * (r43 * c34 - c1345)) * + (u[k][j][i][3] * u[k][j][i][3])) * + tmp3 + + (tx1 + ty1 + tz1) * c1345 * tmp2 * u[k][j][i][4]); + d[j][i][1][4] = + dt * 2.0 * tmp2 * u[k][j][i][1] * + (tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)); + d[j][i][2][4] = + dt * 2.0 * tmp2 * u[k][j][i][2] * + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)); + d[j][i][3][4] = + dt * 2.0 * tmp2 * u[k][j][i][3] * + (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)); + d[j][i][4][4] = 1.0 + dt * 2.0 * (tx1 + ty1 + tz1) * c1345 * tmp1 + + dt * 2.0 * (tx1 * dx5 + ty1 * dy5 + tz1 * dz5); + /* + * --------------------------------------------------------------------- + * form the first block sub-diagonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k - 1][j][i]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + a[j][i][0][0] = -dt * tz1 * dz1; + a[j][i][1][0] = 0.0; + a[j][i][2][0] = 0.0; + a[j][i][3][0] = -dt * tz2; + a[j][i][4][0] = 0.0; + a[j][i][0][1] = + -dt * tz2 * (-(u[k - 1][j][i][1] * u[k - 1][j][i][3]) * tmp2) - + dt * tz1 * (-c34 * tmp2 * u[k - 1][j][i][1]); + a[j][i][1][1] = -dt * tz2 * (u[k - 1][j][i][3] * tmp1) - + dt * tz1 * c34 * tmp1 - dt * tz1 * dz2; + a[j][i][2][1] = 0.0; + a[j][i][3][1] = -dt * tz2 * (u[k - 1][j][i][1] * tmp1); + a[j][i][4][1] = 0.0; + a[j][i][0][2] = + -dt * tz2 * (-(u[k - 1][j][i][2] * u[k - 1][j][i][3]) * tmp2) - + dt * tz1 * (-c34 * tmp2 * u[k - 1][j][i][2]); + a[j][i][1][2] = 0.0; + a[j][i][2][2] = -dt * tz2 * (u[k - 1][j][i][3] * tmp1) - + dt * tz1 * (c34 * tmp1) - dt * tz1 * dz3; + a[j][i][3][2] = -dt * tz2 * (u[k - 1][j][i][2] * tmp1); + a[j][i][4][2] = 0.0; + a[j][i][0][3] = + -dt * tz2 * + (-(u[k - 1][j][i][3] * tmp1) * (u[k - 1][j][i][3] * tmp1) + + C2 * qs[k - 1][j][i] * tmp1) - + dt * tz1 * (-r43 * c34 * tmp2 * u[k - 1][j][i][3]); + a[j][i][1][3] = -dt * tz2 * (-C2 * (u[k - 1][j][i][1] * tmp1)); + a[j][i][2][3] = -dt * tz2 * (-C2 * (u[k - 1][j][i][2] * tmp1)); + a[j][i][3][3] = -dt * tz2 * (2.0 - C2) * (u[k - 1][j][i][3] * tmp1) - + dt * tz1 * (r43 * c34 * tmp1) - dt * tz1 * dz4; + a[j][i][4][3] = -dt * tz2 * C2; + a[j][i][0][4] = + -dt * tz2 * + ((C2 * 2.0 * qs[k - 1][j][i] - C1 * u[k - 1][j][i][4]) * + u[k - 1][j][i][3] * tmp2) - + dt * tz1 * + (-(c34 - c1345) * tmp3 * (u[k - 1][j][i][1] * u[k - 1][j][i][1]) - + (c34 - c1345) * tmp3 * (u[k - 1][j][i][2] * u[k - 1][j][i][2]) - + (r43 * c34 - c1345) * tmp3 * + (u[k - 1][j][i][3] * u[k - 1][j][i][3]) - + c1345 * tmp2 * u[k - 1][j][i][4]); + a[j][i][1][4] = + -dt * tz2 * (-C2 * (u[k - 1][j][i][1] * u[k - 1][j][i][3]) * tmp2) - + dt * tz1 * (c34 - c1345) * tmp2 * u[k - 1][j][i][1]; + a[j][i][2][4] = + -dt * tz2 * (-C2 * (u[k - 1][j][i][2] * u[k - 1][j][i][3]) * tmp2) - + dt * tz1 * (c34 - c1345) * tmp2 * u[k - 1][j][i][2]; + a[j][i][3][4] = -dt * tz2 * + (C1 * (u[k - 1][j][i][4] * tmp1) - + C2 * (qs[k - 1][j][i] * tmp1 + + u[k - 1][j][i][3] * u[k - 1][j][i][3] * tmp2)) - + dt * tz1 * (r43 * c34 - c1345) * tmp2 * u[k - 1][j][i][3]; + a[j][i][4][4] = -dt * tz2 * (C1 * (u[k - 1][j][i][3] * tmp1)) - + dt * tz1 * c1345 * tmp1 - dt * tz1 * dz5; + /* + * --------------------------------------------------------------------- + * form the second block sub-diagonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k][j - 1][i]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + b[j][i][0][0] = -dt * ty1 * dy1; + b[j][i][1][0] = 0.0; + b[j][i][2][0] = -dt * ty2; + b[j][i][3][0] = 0.0; + b[j][i][4][0] = 0.0; + b[j][i][0][1] = + -dt * ty2 * (-(u[k][j - 1][i][1] * u[k][j - 1][i][2]) * tmp2) - + dt * ty1 * (-c34 * tmp2 * u[k][j - 1][i][1]); + b[j][i][1][1] = -dt * ty2 * (u[k][j - 1][i][2] * tmp1) - + dt * ty1 * (c34 * tmp1) - dt * ty1 * dy2; + b[j][i][2][1] = -dt * ty2 * (u[k][j - 1][i][1] * tmp1); + b[j][i][3][1] = 0.0; + b[j][i][4][1] = 0.0; + b[j][i][0][2] = + -dt * ty2 * + (-(u[k][j - 1][i][2] * tmp1) * (u[k][j - 1][i][2] * tmp1) + + C2 * (qs[k][j - 1][i] * tmp1)) - + dt * ty1 * (-r43 * c34 * tmp2 * u[k][j - 1][i][2]); + b[j][i][1][2] = -dt * ty2 * (-C2 * (u[k][j - 1][i][1] * tmp1)); + b[j][i][2][2] = -dt * ty2 * ((2.0 - C2) * (u[k][j - 1][i][2] * tmp1)) - + dt * ty1 * (r43 * c34 * tmp1) - dt * ty1 * dy3; + b[j][i][3][2] = -dt * ty2 * (-C2 * (u[k][j - 1][i][3] * tmp1)); + b[j][i][4][2] = -dt * ty2 * C2; + b[j][i][0][3] = + -dt * ty2 * (-(u[k][j - 1][i][2] * u[k][j - 1][i][3]) * tmp2) - + dt * ty1 * (-c34 * tmp2 * u[k][j - 1][i][3]); + b[j][i][1][3] = 0.0; + b[j][i][2][3] = -dt * ty2 * (u[k][j - 1][i][3] * tmp1); + b[j][i][3][3] = -dt * ty2 * (u[k][j - 1][i][2] * tmp1) - + dt * ty1 * (c34 * tmp1) - dt * ty1 * dy4; + b[j][i][4][3] = 0.0; + b[j][i][0][4] = + -dt * ty2 * + ((C2 * 2.0 * qs[k][j - 1][i] - C1 * u[k][j - 1][i][4]) * + (u[k][j - 1][i][2] * tmp2)) - + dt * ty1 * + (-(c34 - c1345) * tmp3 * (u[k][j - 1][i][1] * u[k][j - 1][i][1]) - + (r43 * c34 - c1345) * tmp3 * + (u[k][j - 1][i][2] * u[k][j - 1][i][2]) - + (c34 - c1345) * tmp3 * (u[k][j - 1][i][3] * u[k][j - 1][i][3]) - + c1345 * tmp2 * u[k][j - 1][i][4]); + b[j][i][1][4] = + -dt * ty2 * (-C2 * (u[k][j - 1][i][1] * u[k][j - 1][i][2]) * tmp2) - + dt * ty1 * (c34 - c1345) * tmp2 * u[k][j - 1][i][1]; + b[j][i][2][4] = -dt * ty2 * + (C1 * (u[k][j - 1][i][4] * tmp1) - + C2 * (qs[k][j - 1][i] * tmp1 + + u[k][j - 1][i][2] * u[k][j - 1][i][2] * tmp2)) - + dt * ty1 * (r43 * c34 - c1345) * tmp2 * u[k][j - 1][i][2]; + b[j][i][3][4] = + -dt * ty2 * (-C2 * (u[k][j - 1][i][2] * u[k][j - 1][i][3]) * tmp2) - + dt * ty1 * (c34 - c1345) * tmp2 * u[k][j - 1][i][3]; + b[j][i][4][4] = -dt * ty2 * (C1 * (u[k][j - 1][i][2] * tmp1)) - + dt * ty1 * c1345 * tmp1 - dt * ty1 * dy5; + /* + * --------------------------------------------------------------------- + * form the third block sub-diagonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k][j][i - 1]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + c[j][i][0][0] = -dt * tx1 * dx1; + c[j][i][1][0] = -dt * tx2; + c[j][i][2][0] = 0.0; + c[j][i][3][0] = 0.0; + c[j][i][4][0] = 0.0; + c[j][i][0][1] = + -dt * tx2 * + (-(u[k][j][i - 1][1] * tmp1) * (u[k][j][i - 1][1] * tmp1) + + C2 * qs[k][j][i - 1] * tmp1) - + dt * tx1 * (-r43 * c34 * tmp2 * u[k][j][i - 1][1]); + c[j][i][1][1] = -dt * tx2 * ((2.0 - C2) * (u[k][j][i - 1][1] * tmp1)) - + dt * tx1 * (r43 * c34 * tmp1) - dt * tx1 * dx2; + c[j][i][2][1] = -dt * tx2 * (-C2 * (u[k][j][i - 1][2] * tmp1)); + c[j][i][3][1] = -dt * tx2 * (-C2 * (u[k][j][i - 1][3] * tmp1)); + c[j][i][4][1] = -dt * tx2 * C2; + c[j][i][0][2] = + -dt * tx2 * (-(u[k][j][i - 1][1] * u[k][j][i - 1][2]) * tmp2) - + dt * tx1 * (-c34 * tmp2 * u[k][j][i - 1][2]); + c[j][i][1][2] = -dt * tx2 * (u[k][j][i - 1][2] * tmp1); + c[j][i][2][2] = -dt * tx2 * (u[k][j][i - 1][1] * tmp1) - + dt * tx1 * (c34 * tmp1) - dt * tx1 * dx3; + c[j][i][3][2] = 0.0; + c[j][i][4][2] = 0.0; + c[j][i][0][3] = + -dt * tx2 * (-(u[k][j][i - 1][1] * u[k][j][i - 1][3]) * tmp2) - + dt * tx1 * (-c34 * tmp2 * u[k][j][i - 1][3]); + c[j][i][1][3] = -dt * tx2 * (u[k][j][i - 1][3] * tmp1); + c[j][i][2][3] = 0.0; + c[j][i][3][3] = -dt * tx2 * (u[k][j][i - 1][1] * tmp1) - + dt * tx1 * (c34 * tmp1) - dt * tx1 * dx4; + c[j][i][4][3] = 0.0; + c[j][i][0][4] = + -dt * tx2 * + ((C2 * 2.0 * qs[k][j][i - 1] - C1 * u[k][j][i - 1][4]) * + u[k][j][i - 1][1] * tmp2) - + dt * tx1 * + (-(r43 * c34 - c1345) * tmp3 * + (u[k][j][i - 1][1] * u[k][j][i - 1][1]) - + (c34 - c1345) * tmp3 * (u[k][j][i - 1][2] * u[k][j][i - 1][2]) - + (c34 - c1345) * tmp3 * (u[k][j][i - 1][3] * u[k][j][i - 1][3]) - + c1345 * tmp2 * u[k][j][i - 1][4]); + c[j][i][1][4] = -dt * tx2 * + (C1 * (u[k][j][i - 1][4] * tmp1) - + C2 * (u[k][j][i - 1][1] * u[k][j][i - 1][1] * tmp2 + + qs[k][j][i - 1] * tmp1)) - + dt * tx1 * (r43 * c34 - c1345) * tmp2 * u[k][j][i - 1][1]; + c[j][i][2][4] = + -dt * tx2 * (-C2 * (u[k][j][i - 1][2] * u[k][j][i - 1][1]) * tmp2) - + dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i - 1][2]; + c[j][i][3][4] = + -dt * tx2 * (-C2 * (u[k][j][i - 1][3] * u[k][j][i - 1][1]) * tmp2) - + dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i - 1][3]; + c[j][i][4][4] = -dt * tx2 * (C1 * (u[k][j][i - 1][1] * tmp1)) - + dt * tx1 * c1345 * tmp1 - dt * tx1 * dx5; + } + } +} + +/* + * --------------------------------------------------------------------- + * compute the upper triangular part of the jacobian matrix + * --------------------------------------------------------------------- + */ +void +jacu(int k) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j; + double r43; + double c1345; + double c34; + double tmp1, tmp2, tmp3; + r43 = (4.0 / 3.0); + c1345 = C1 * C3 * C4 * C5; + c34 = C3 * C4; + +#pragma omp for nowait schedule(static) + for(j = jend - 1; j >= jst; j--) + { + for(i = iend - 1; i >= ist; i--) + { + /* + * --------------------------------------------------------------------- + * form the block daigonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k][j][i]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + d[j][i][0][0] = 1.0 + dt * 2.0 * (tx1 * dx1 + ty1 * dy1 + tz1 * dz1); + d[j][i][1][0] = 0.0; + d[j][i][2][0] = 0.0; + d[j][i][3][0] = 0.0; + d[j][i][4][0] = 0.0; + d[j][i][0][1] = + dt * 2.0 * (-tx1 * r43 - ty1 - tz1) * (c34 * tmp2 * u[k][j][i][1]); + d[j][i][1][1] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 * r43 + ty1 + tz1) + + dt * 2.0 * (tx1 * dx2 + ty1 * dy2 + tz1 * dz2); + d[j][i][2][1] = 0.0; + d[j][i][3][1] = 0.0; + d[j][i][4][1] = 0.0; + d[j][i][0][2] = + dt * 2.0 * (-tx1 - ty1 * r43 - tz1) * (c34 * tmp2 * u[k][j][i][2]); + d[j][i][1][2] = 0.0; + d[j][i][2][2] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 * r43 + tz1) + + dt * 2.0 * (tx1 * dx3 + ty1 * dy3 + tz1 * dz3); + d[j][i][3][2] = 0.0; + d[j][i][4][2] = 0.0; + d[j][i][0][3] = + dt * 2.0 * (-tx1 - ty1 - tz1 * r43) * (c34 * tmp2 * u[k][j][i][3]); + d[j][i][1][3] = 0.0; + d[j][i][2][3] = 0.0; + d[j][i][3][3] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 + tz1 * r43) + + dt * 2.0 * (tx1 * dx4 + ty1 * dy4 + tz1 * dz4); + d[j][i][4][3] = 0.0; + d[j][i][0][4] = -dt * 2.0 * + (((tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + + tz1 * (c34 - c1345)) * + (u[k][j][i][1] * u[k][j][i][1]) + + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + + tz1 * (c34 - c1345)) * + (u[k][j][i][2] * u[k][j][i][2]) + + (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + + tz1 * (r43 * c34 - c1345)) * + (u[k][j][i][3] * u[k][j][i][3])) * + tmp3 + + (tx1 + ty1 + tz1) * c1345 * tmp2 * u[k][j][i][4]); + d[j][i][1][4] = + dt * 2.0 * + (tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)) * + tmp2 * u[k][j][i][1]; + d[j][i][2][4] = + dt * 2.0 * + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)) * + tmp2 * u[k][j][i][2]; + d[j][i][3][4] = + dt * 2.0 * + (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)) * + tmp2 * u[k][j][i][3]; + d[j][i][4][4] = 1.0 + dt * 2.0 * (tx1 + ty1 + tz1) * c1345 * tmp1 + + dt * 2.0 * (tx1 * dx5 + ty1 * dy5 + tz1 * dz5); + /* + * --------------------------------------------------------------------- + * form the first block sub-diagonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k][j][i + 1]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + a[j][i][0][0] = -dt * tx1 * dx1; + a[j][i][1][0] = dt * tx2; + a[j][i][2][0] = 0.0; + a[j][i][3][0] = 0.0; + a[j][i][4][0] = 0.0; + a[j][i][0][1] = + dt * tx2 * + (-(u[k][j][i + 1][1] * tmp1) * (u[k][j][i + 1][1] * tmp1) + + C2 * qs[k][j][i + 1] * tmp1) - + dt * tx1 * (-r43 * c34 * tmp2 * u[k][j][i + 1][1]); + a[j][i][1][1] = dt * tx2 * ((2.0 - C2) * (u[k][j][i + 1][1] * tmp1)) - + dt * tx1 * (r43 * c34 * tmp1) - dt * tx1 * dx2; + a[j][i][2][1] = dt * tx2 * (-C2 * (u[k][j][i + 1][2] * tmp1)); + a[j][i][3][1] = dt * tx2 * (-C2 * (u[k][j][i + 1][3] * tmp1)); + a[j][i][4][1] = dt * tx2 * C2; + a[j][i][0][2] = dt * tx2 * (-(u[k][j][i + 1][1] * u[k][j][i + 1][2]) * tmp2) - + dt * tx1 * (-c34 * tmp2 * u[k][j][i + 1][2]); + a[j][i][1][2] = dt * tx2 * (u[k][j][i + 1][2] * tmp1); + a[j][i][2][2] = dt * tx2 * (u[k][j][i + 1][1] * tmp1) - + dt * tx1 * (c34 * tmp1) - dt * tx1 * dx3; + a[j][i][3][2] = 0.0; + a[j][i][4][2] = 0.0; + a[j][i][0][3] = dt * tx2 * (-(u[k][j][i + 1][1] * u[k][j][i + 1][3]) * tmp2) - + dt * tx1 * (-c34 * tmp2 * u[k][j][i + 1][3]); + a[j][i][1][3] = dt * tx2 * (u[k][j][i + 1][3] * tmp1); + a[j][i][2][3] = 0.0; + a[j][i][3][3] = dt * tx2 * (u[k][j][i + 1][1] * tmp1) - + dt * tx1 * (c34 * tmp1) - dt * tx1 * dx4; + a[j][i][4][3] = 0.0; + a[j][i][0][4] = + dt * tx2 * + ((C2 * 2.0 * qs[k][j][i + 1] - C1 * u[k][j][i + 1][4]) * + (u[k][j][i + 1][1] * tmp2)) - + dt * tx1 * + (-(r43 * c34 - c1345) * tmp3 * + (u[k][j][i + 1][1] * u[k][j][i + 1][1]) - + (c34 - c1345) * tmp3 * (u[k][j][i + 1][2] * u[k][j][i + 1][2]) - + (c34 - c1345) * tmp3 * (u[k][j][i + 1][3] * u[k][j][i + 1][3]) - + c1345 * tmp2 * u[k][j][i + 1][4]); + a[j][i][1][4] = dt * tx2 * + (C1 * (u[k][j][i + 1][4] * tmp1) - + C2 * (u[k][j][i + 1][1] * u[k][j][i + 1][1] * tmp2 + + qs[k][j][i + 1] * tmp1)) - + dt * tx1 * (r43 * c34 - c1345) * tmp2 * u[k][j][i + 1][1]; + a[j][i][2][4] = + dt * tx2 * (-C2 * (u[k][j][i + 1][2] * u[k][j][i + 1][1]) * tmp2) - + dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i + 1][2]; + a[j][i][3][4] = + dt * tx2 * (-C2 * (u[k][j][i + 1][3] * u[k][j][i + 1][1]) * tmp2) - + dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i + 1][3]; + a[j][i][4][4] = dt * tx2 * (C1 * (u[k][j][i + 1][1] * tmp1)) - + dt * tx1 * c1345 * tmp1 - dt * tx1 * dx5; + /* + * --------------------------------------------------------------------- + * form the second block sub-diagonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k][j + 1][i]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + b[j][i][0][0] = -dt * ty1 * dy1; + b[j][i][1][0] = 0.0; + b[j][i][2][0] = dt * ty2; + b[j][i][3][0] = 0.0; + b[j][i][4][0] = 0.0; + b[j][i][0][1] = dt * ty2 * (-(u[k][j + 1][i][1] * u[k][j + 1][i][2]) * tmp2) - + dt * ty1 * (-c34 * tmp2 * u[k][j + 1][i][1]); + b[j][i][1][1] = dt * ty2 * (u[k][j + 1][i][2] * tmp1) - + dt * ty1 * (c34 * tmp1) - dt * ty1 * dy2; + b[j][i][2][1] = dt * ty2 * (u[k][j + 1][i][1] * tmp1); + b[j][i][3][1] = 0.0; + b[j][i][4][1] = 0.0; + b[j][i][0][2] = + dt * ty2 * + (-(u[k][j + 1][i][2] * tmp1) * (u[k][j + 1][i][2] * tmp1) + + C2 * (qs[k][j + 1][i] * tmp1)) - + dt * ty1 * (-r43 * c34 * tmp2 * u[k][j + 1][i][2]); + b[j][i][1][2] = dt * ty2 * (-C2 * (u[k][j + 1][i][1] * tmp1)); + b[j][i][2][2] = dt * ty2 * ((2.0 - C2) * (u[k][j + 1][i][2] * tmp1)) - + dt * ty1 * (r43 * c34 * tmp1) - dt * ty1 * dy3; + b[j][i][3][2] = dt * ty2 * (-C2 * (u[k][j + 1][i][3] * tmp1)); + b[j][i][4][2] = dt * ty2 * C2; + b[j][i][0][3] = dt * ty2 * (-(u[k][j + 1][i][2] * u[k][j + 1][i][3]) * tmp2) - + dt * ty1 * (-c34 * tmp2 * u[k][j + 1][i][3]); + b[j][i][1][3] = 0.0; + b[j][i][2][3] = dt * ty2 * (u[k][j + 1][i][3] * tmp1); + b[j][i][3][3] = dt * ty2 * (u[k][j + 1][i][2] * tmp1) - + dt * ty1 * (c34 * tmp1) - dt * ty1 * dy4; + b[j][i][4][3] = 0.0; + b[j][i][0][4] = + dt * ty2 * + ((C2 * 2.0 * qs[k][j + 1][i] - C1 * u[k][j + 1][i][4]) * + (u[k][j + 1][i][2] * tmp2)) - + dt * ty1 * + (-(c34 - c1345) * tmp3 * (u[k][j + 1][i][1] * u[k][j + 1][i][1]) - + (r43 * c34 - c1345) * tmp3 * + (u[k][j + 1][i][2] * u[k][j + 1][i][2]) - + (c34 - c1345) * tmp3 * (u[k][j + 1][i][3] * u[k][j + 1][i][3]) - + c1345 * tmp2 * u[k][j + 1][i][4]); + b[j][i][1][4] = + dt * ty2 * (-C2 * (u[k][j + 1][i][1] * u[k][j + 1][i][2]) * tmp2) - + dt * ty1 * (c34 - c1345) * tmp2 * u[k][j + 1][i][1]; + b[j][i][2][4] = dt * ty2 * + (C1 * (u[k][j + 1][i][4] * tmp1) - + C2 * (qs[k][j + 1][i] * tmp1 + + u[k][j + 1][i][2] * u[k][j + 1][i][2] * tmp2)) - + dt * ty1 * (r43 * c34 - c1345) * tmp2 * u[k][j + 1][i][2]; + b[j][i][3][4] = + dt * ty2 * (-C2 * (u[k][j + 1][i][2] * u[k][j + 1][i][3]) * tmp2) - + dt * ty1 * (c34 - c1345) * tmp2 * u[k][j + 1][i][3]; + b[j][i][4][4] = dt * ty2 * (C1 * (u[k][j + 1][i][2] * tmp1)) - + dt * ty1 * c1345 * tmp1 - dt * ty1 * dy5; + /* + * --------------------------------------------------------------------- + * form the third block sub-diagonal + * --------------------------------------------------------------------- + */ + tmp1 = rho_i[k + 1][j][i]; + tmp2 = tmp1 * tmp1; + tmp3 = tmp1 * tmp2; + c[j][i][0][0] = -dt * tz1 * dz1; + c[j][i][1][0] = 0.0; + c[j][i][2][0] = 0.0; + c[j][i][3][0] = dt * tz2; + c[j][i][4][0] = 0.0; + c[j][i][0][1] = dt * tz2 * (-(u[k + 1][j][i][1] * u[k + 1][j][i][3]) * tmp2) - + dt * tz1 * (-c34 * tmp2 * u[k + 1][j][i][1]); + c[j][i][1][1] = dt * tz2 * (u[k + 1][j][i][3] * tmp1) - + dt * tz1 * c34 * tmp1 - dt * tz1 * dz2; + c[j][i][2][1] = 0.0; + c[j][i][3][1] = dt * tz2 * (u[k + 1][j][i][1] * tmp1); + c[j][i][4][1] = 0.0; + c[j][i][0][2] = dt * tz2 * (-(u[k + 1][j][i][2] * u[k + 1][j][i][3]) * tmp2) - + dt * tz1 * (-c34 * tmp2 * u[k + 1][j][i][2]); + c[j][i][1][2] = 0.0; + c[j][i][2][2] = dt * tz2 * (u[k + 1][j][i][3] * tmp1) - + dt * tz1 * (c34 * tmp1) - dt * tz1 * dz3; + c[j][i][3][2] = dt * tz2 * (u[k + 1][j][i][2] * tmp1); + c[j][i][4][2] = 0.0; + c[j][i][0][3] = + dt * tz2 * + (-(u[k + 1][j][i][3] * tmp1) * (u[k + 1][j][i][3] * tmp1) + + C2 * (qs[k + 1][j][i] * tmp1)) - + dt * tz1 * (-r43 * c34 * tmp2 * u[k + 1][j][i][3]); + c[j][i][1][3] = dt * tz2 * (-C2 * (u[k + 1][j][i][1] * tmp1)); + c[j][i][2][3] = dt * tz2 * (-C2 * (u[k + 1][j][i][2] * tmp1)); + c[j][i][3][3] = dt * tz2 * (2.0 - C2) * (u[k + 1][j][i][3] * tmp1) - + dt * tz1 * (r43 * c34 * tmp1) - dt * tz1 * dz4; + c[j][i][4][3] = dt * tz2 * C2; + c[j][i][0][4] = + dt * tz2 * + ((C2 * 2.0 * qs[k + 1][j][i] - C1 * u[k + 1][j][i][4]) * + (u[k + 1][j][i][3] * tmp2)) - + dt * tz1 * + (-(c34 - c1345) * tmp3 * (u[k + 1][j][i][1] * u[k + 1][j][i][1]) - + (c34 - c1345) * tmp3 * (u[k + 1][j][i][2] * u[k + 1][j][i][2]) - + (r43 * c34 - c1345) * tmp3 * + (u[k + 1][j][i][3] * u[k + 1][j][i][3]) - + c1345 * tmp2 * u[k + 1][j][i][4]); + c[j][i][1][4] = + dt * tz2 * (-C2 * (u[k + 1][j][i][1] * u[k + 1][j][i][3]) * tmp2) - + dt * tz1 * (c34 - c1345) * tmp2 * u[k + 1][j][i][1]; + c[j][i][2][4] = + dt * tz2 * (-C2 * (u[k + 1][j][i][2] * u[k + 1][j][i][3]) * tmp2) - + dt * tz1 * (c34 - c1345) * tmp2 * u[k + 1][j][i][2]; + c[j][i][3][4] = dt * tz2 * + (C1 * (u[k + 1][j][i][4] * tmp1) - + C2 * (qs[k + 1][j][i] * tmp1 + + u[k + 1][j][i][3] * u[k + 1][j][i][3] * tmp2)) - + dt * tz1 * (r43 * c34 - c1345) * tmp2 * u[k + 1][j][i][3]; + c[j][i][4][4] = dt * tz2 * (C1 * (u[k + 1][j][i][3] * tmp1)) - + dt * tz1 * c1345 * tmp1 - dt * tz1 * dz5; + } + } +} + +/* + * --------------------------------------------------------------------- + * to compute the l2-norm of vector v. + * --------------------------------------------------------------------- + * to improve cache performance, second two dimensions padded by 1 + * for even number sizes only. Only needed in v. + * --------------------------------------------------------------------- + */ +void +l2norm(int nx0, int ny0, int nz0, int ist, int iend, int jst, int jend, + double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], double sum[5]) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m; + double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0; + +#pragma omp single + for(m = 0; m < 5; m++) + { + sum[m] = 0.0; + } + +#pragma omp for nowait + for(k = 1; k < nz0 - 1; k++) + { + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + sum0 = sum0 + v[i][j][k][0] * v[i][j][k][0]; + sum1 = sum1 + v[i][j][k][1] * v[i][j][k][1]; + sum2 = sum2 + v[i][j][k][2] * v[i][j][k][2]; + sum3 = sum3 + v[i][j][k][3] * v[i][j][k][3]; + sum4 = sum4 + v[i][j][k][4] * v[i][j][k][4]; + } + } + } + +#pragma omp critical + { + sum[0] += sum0; + sum[1] += sum1; + sum[2] += sum2; + sum[3] += sum3; + sum[4] += sum4; + } +#pragma omp barrier + +#pragma omp single + for(m = 0; m < 5; m++) + { + sum[m] = sqrt(sum[m] / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2))); + } +} + +void +pintgr() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k; + int ibeg, ifin, ifin1; + int jbeg, jfin, jfin1; + double phi1[ISIZ3 + 2][ISIZ2 + 2]; + double phi2[ISIZ3 + 2][ISIZ2 + 2]; + double frc1, frc2, frc3; + /* + * --------------------------------------------------------------------- + * set up the sub-domains for integeration in each processor + * --------------------------------------------------------------------- + */ + ibeg = ii1; + ifin = ii2; + jbeg = ji1; + jfin = ji2; + ifin1 = ifin - 1; + jfin1 = jfin - 1; + /* + * --------------------------------------------------------------------- + * initialize + * --------------------------------------------------------------------- + */ + for(i = 0; i <= ISIZ2 + 1; i++) + { + for(k = 0; k <= ISIZ3 + 1; k++) + { + phi1[k][i] = 0.0; + phi2[k][i] = 0.0; + } + } + for(j = jbeg; j < jfin; j++) + { + for(i = ibeg; i < ifin; i++) + { + k = ki1; + phi1[j][i] = C2 * (u[k][j][i][4] - 0.50 * + (u[k][j][i][1] * u[k][j][i][1] + + u[k][j][i][2] * u[k][j][i][2] + + u[k][j][i][3] * u[k][j][i][3]) / + u[k][j][i][0]); + k = ki2 - 1; + phi2[j][i] = C2 * (u[k][j][i][4] - 0.50 * + (u[k][j][i][1] * u[k][j][i][1] + + u[k][j][i][2] * u[k][j][i][2] + + u[k][j][i][3] * u[k][j][i][3]) / + u[k][j][i][0]); + } + } + frc1 = 0.0; + for(j = jbeg; j < jfin1; j++) + { + for(i = ibeg; i < ifin1; i++) + { + frc1 = frc1 + + (phi1[j][i] + phi1[j][i + 1] + phi1[j + 1][i] + phi1[j + 1][i + 1] + + phi2[j][i] + phi2[j][i + 1] + phi2[j + 1][i] + phi2[j + 1][i + 1]); + } + } + frc1 = dxi * deta * frc1; + /* + * --------------------------------------------------------------------- + * initialize + * --------------------------------------------------------------------- + */ + for(i = 0; i <= ISIZ2 + 1; i++) + { + for(k = 0; k <= ISIZ3 + 1; k++) + { + phi1[k][i] = 0.0; + phi2[k][i] = 0.0; + } + } + if(jbeg == ji1) + { + for(k = ki1; k < ki2; k++) + { + for(i = ibeg; i < ifin; i++) + { + phi1[k][i] = + C2 * (u[k][jbeg][i][4] - 0.50 * + (u[k][jbeg][i][1] * u[k][jbeg][i][1] + + u[k][jbeg][i][2] * u[k][jbeg][i][2] + + u[k][jbeg][i][3] * u[k][jbeg][i][3]) / + u[k][jbeg][i][0]); + } + } + } + if(jfin == ji2) + { + for(k = ki1; k < ki2; k++) + { + for(i = ibeg; i < ifin; i++) + { + phi2[k][i] = C2 * (u[k][jfin - 1][i][4] - + 0.50 * + (u[k][jfin - 1][i][1] * u[k][jfin - 1][i][1] + + u[k][jfin - 1][i][2] * u[k][jfin - 1][i][2] + + u[k][jfin - 1][i][3] * u[k][jfin - 1][i][3]) / + u[k][jfin - 1][i][0]); + } + } + } + frc2 = 0.0; + for(k = ki1; k < ki2 - 1; k++) + { + for(i = ibeg; i < ifin1; i++) + { + frc2 = frc2 + + (phi1[k][i] + phi1[k][i + 1] + phi1[k + 1][i] + phi1[k + 1][i + 1] + + phi2[k][i] + phi2[k][i + 1] + phi2[k + 1][i] + phi2[k + 1][i + 1]); + } + } + frc2 = dxi * dzeta * frc2; + /* + * --------------------------------------------------------------------- + * initialize + * --------------------------------------------------------------------- + */ + for(i = 0; i <= ISIZ2 + 1; i++) + { + for(k = 0; k <= ISIZ3 + 1; k++) + { + phi1[k][i] = 0.0; + phi2[k][i] = 0.0; + } + } + if(ibeg == ii1) + { + for(k = ki1; k < ki2; k++) + { + for(j = jbeg; j < jfin; j++) + { + phi1[k][j] = + C2 * (u[k][j][ibeg][4] - 0.50 * + (u[k][j][ibeg][1] * u[k][j][ibeg][1] + + u[k][j][ibeg][2] * u[k][j][ibeg][2] + + u[k][j][ibeg][3] * u[k][j][ibeg][3]) / + u[k][j][ibeg][0]); + } + } + } + if(ifin == ii2) + { + for(k = ki1; k < ki2; k++) + { + for(j = jbeg; j < jfin; j++) + { + phi2[k][j] = C2 * (u[k][j][ifin - 1][4] - + 0.50 * + (u[k][j][ifin - 1][1] * u[k][j][ifin - 1][1] + + u[k][j][ifin - 1][2] * u[k][j][ifin - 1][2] + + u[k][j][ifin - 1][3] * u[k][j][ifin - 1][3]) / + u[k][j][ifin - 1][0]); + } + } + } + frc3 = 0.0; + for(k = ki1; k < ki2 - 1; k++) + { + for(j = jbeg; j < jfin1; j++) + { + frc3 = frc3 + + (phi1[k][j] + phi1[k][j + 1] + phi1[k + 1][j] + phi1[k + 1][j + 1] + + phi2[k][j] + phi2[k][j + 1] + phi2[k + 1][j] + phi2[k + 1][j + 1]); + } + } + frc3 = deta * dzeta * frc3; + frc = 0.25 * (frc1 + frc2 + frc3); +} + +void +read_input() +{ + /* + * --------------------------------------------------------------------- + * if input file does not exist, it uses defaults + * ipr = 1 for detailed progress output + * inorm = how often the norm is printed (once every inorm iterations) + * itmax = number of pseudo time steps + * dt = time step + * omega 1 over-relaxation factor for SSOR + * tolrsd = steady state residual tolerance levels + * nx, ny, nz = number of grid points in x, y, z directions + * --------------------------------------------------------------------- + */ + FILE* fp; + int avoid_warning = 0; + if((fp = fopen("inputlu.data", "r")) != nullptr) + { + printf("Reading from input file inputlu.data\n"); + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + avoid_warning = fscanf(fp, "%d%d", &ipr, &inorm); + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + avoid_warning = fscanf(fp, "%d", &itmax); + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + avoid_warning = fscanf(fp, "%lf", &dt); + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + avoid_warning = fscanf(fp, "%lf", &omega); + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + avoid_warning = fscanf(fp, "%lf%lf%lf%lf%lf", &tolrsd[0], &tolrsd[1], &tolrsd[2], + &tolrsd[3], &tolrsd[4]); + while(fgetc(fp) != '\n') + ; + while(fgetc(fp) != '\n') + ; + avoid_warning = fscanf(fp, "%d%d%d", &nx0, &ny0, &nz0); + fclose(fp); + } + else + { + ipr = IPR_DEFAULT; + inorm = INORM_DEFAULT; + itmax = ITMAX_DEFAULT; + dt = DT_DEFAULT; + omega = OMEGA_DEFAULT; + tolrsd[0] = TOLRSD1_DEF; + tolrsd[1] = TOLRSD2_DEF; + tolrsd[2] = TOLRSD3_DEF; + tolrsd[3] = TOLRSD4_DEF; + tolrsd[4] = TOLRSD5_DEF; + nx0 = ISIZ1; + ny0 = ISIZ2; + nz0 = ISIZ3; + } + (void) avoid_warning; + /* + * --------------------------------------------------------------------- + * check problem size + * --------------------------------------------------------------------- + */ + if((nx0 < 4) || (ny0 < 4) || (nz0 < 4)) + { + printf(" PROBLEM SIZE IS TOO SMALL - \n" + " SET EACH OF NX, NY AND NZ AT LEAST EQUAL TO 5\n"); + exit(EXIT_FAILURE); + } + if((nx0 > ISIZ1) || (ny0 > ISIZ2) || (nz0 > ISIZ3)) + { + printf(" PROBLEM SIZE IS TOO LARGE - \n" + " NX, NY AND NZ SHOULD BE EQUAL TO \n" + " ISIZ1, ISIZ2 AND ISIZ3 RESPECTIVELY\n"); + exit(EXIT_FAILURE); + } + printf("\n\n NAS Parallel Benchmarks 4.1 Parallel C++ version with OpenMP - LU " + "Benchmark\n\n"); + printf(" Size: %4dx%4dx%4d\n", nx0, ny0, nz0); + printf(" Iterations: %4d\n", itmax); + printf("\n"); +} + +/* + * --------------------------------------------------------------------- + * compute the right hand sides + * --------------------------------------------------------------------- + */ +void +rhs() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m; + double q; + double tmp, utmp[ISIZ3][6], rtmp[ISIZ3][5]; + double u21, u31, u41; + double u21i, u31i, u41i, u51i; + double u21j, u31j, u41j, u51j; + double u21k, u31k, u41k, u51k; + double u21im1, u31im1, u41im1, u51im1; + double u21jm1, u31jm1, u41jm1, u51jm1; + double u21km1, u31km1, u41km1, u51km1; + double flux[ISIZ1][5]; + + if(timeron) + { + timer_start(T_RHS); + } +#pragma omp for + for(k = 0; k < nz; k++) + { + for(j = 0; j < ny; j++) + { + for(i = 0; i < nx; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = -frct[k][j][i][m]; + } + tmp = 1.0 / u[k][j][i][0]; + rho_i[k][j][i] = tmp; + qs[k][j][i] = + 0.50 * + (u[k][j][i][1] * u[k][j][i][1] + u[k][j][i][2] * u[k][j][i][2] + + u[k][j][i][3] * u[k][j][i][3]) * + tmp; + } + } + } + if(timeron) + { + timer_start(T_RHSX); + } +/* + * --------------------------------------------------------------------- + * xi-direction flux differences + * --------------------------------------------------------------------- + */ +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + for(j = jst; j < jend; j++) + { + for(i = 0; i < nx; i++) + { + flux[i][0] = u[k][j][i][1]; + u21 = u[k][j][i][1] * rho_i[k][j][i]; + q = qs[k][j][i]; + flux[i][1] = u[k][j][i][1] * u21 + C2 * (u[k][j][i][4] - q); + flux[i][2] = u[k][j][i][2] * u21; + flux[i][3] = u[k][j][i][3] * u21; + flux[i][4] = (C1 * u[k][j][i][4] - C2 * q) * u21; + } + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = + rsd[k][j][i][m] - tx2 * (flux[i + 1][m] - flux[i - 1][m]); + } + } + for(i = ist; i < nx; i++) + { + tmp = rho_i[k][j][i]; + u21i = tmp * u[k][j][i][1]; + u31i = tmp * u[k][j][i][2]; + u41i = tmp * u[k][j][i][3]; + u51i = tmp * u[k][j][i][4]; + tmp = rho_i[k][j][i - 1]; + u21im1 = tmp * u[k][j][i - 1][1]; + u31im1 = tmp * u[k][j][i - 1][2]; + u41im1 = tmp * u[k][j][i - 1][3]; + u51im1 = tmp * u[k][j][i - 1][4]; + flux[i][1] = (4.0 / 3.0) * tx3 * (u21i - u21im1); + flux[i][2] = tx3 * (u31i - u31im1); + flux[i][3] = tx3 * (u41i - u41im1); + flux[i][4] = 0.50 * (1.0 - C1 * C5) * tx3 * + ((u21i * u21i + u31i * u31i + u41i * u41i) - + (u21im1 * u21im1 + u31im1 * u31im1 + u41im1 * u41im1)) + + (1.0 / 6.0) * tx3 * (u21i * u21i - u21im1 * u21im1) + + C1 * C5 * tx3 * (u51i - u51im1); + } + for(i = ist; i < iend; i++) + { + rsd[k][j][i][0] = + rsd[k][j][i][0] + + dx1 * tx1 * + (u[k][j][i - 1][0] - 2.0 * u[k][j][i][0] + u[k][j][i + 1][0]); + rsd[k][j][i][1] = + rsd[k][j][i][1] + tx3 * C3 * C4 * (flux[i + 1][1] - flux[i][1]) + + dx2 * tx1 * + (u[k][j][i - 1][1] - 2.0 * u[k][j][i][1] + u[k][j][i + 1][1]); + rsd[k][j][i][2] = + rsd[k][j][i][2] + tx3 * C3 * C4 * (flux[i + 1][2] - flux[i][2]) + + dx3 * tx1 * + (u[k][j][i - 1][2] - 2.0 * u[k][j][i][2] + u[k][j][i + 1][2]); + rsd[k][j][i][3] = + rsd[k][j][i][3] + tx3 * C3 * C4 * (flux[i + 1][3] - flux[i][3]) + + dx4 * tx1 * + (u[k][j][i - 1][3] - 2.0 * u[k][j][i][3] + u[k][j][i + 1][3]); + rsd[k][j][i][4] = + rsd[k][j][i][4] + tx3 * C3 * C4 * (flux[i + 1][4] - flux[i][4]) + + dx5 * tx1 * + (u[k][j][i - 1][4] - 2.0 * u[k][j][i][4] + u[k][j][i + 1][4]); + } + /* + * --------------------------------------------------------------------- + * fourth-order dissipation + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + rsd[k][j][1][m] = + rsd[k][j][1][m] - + dssp * (+5.0 * u[k][j][1][m] - 4.0 * u[k][j][2][m] + u[k][j][3][m]); + rsd[k][j][2][m] = + rsd[k][j][2][m] - dssp * (-4.0 * u[k][j][1][m] + 6.0 * u[k][j][2][m] - + 4.0 * u[k][j][3][m] + u[k][j][4][m]); + } + for(i = 3; i < nx - 3; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = + rsd[k][j][i][m] - + dssp * (u[k][j][i - 2][m] - 4.0 * u[k][j][i - 1][m] + + 6.0 * u[k][j][i][m] - 4.0 * u[k][j][i + 1][m] + + u[k][j][i + 2][m]); + } + } + for(m = 0; m < 5; m++) + { + rsd[k][j][nx - 3][m] = + rsd[k][j][nx - 3][m] - + dssp * (u[k][j][nx - 5][m] - 4.0 * u[k][j][nx - 4][m] + + 6.0 * u[k][j][nx - 3][m] - 4.0 * u[k][j][nx - 2][m]); + rsd[k][j][nx - 2][m] = + rsd[k][j][nx - 2][m] - + dssp * (u[k][j][nx - 4][m] - 4.0 * u[k][j][nx - 3][m] + + 5.0 * u[k][j][nx - 2][m]); + } + } + } + if(timeron) + { + timer_stop(T_RHSX); + } + if(timeron) + { + timer_start(T_RHSY); + } +/* + * --------------------------------------------------------------------- + * eta-direction flux differences + * --------------------------------------------------------------------- + */ +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + for(i = ist; i < iend; i++) + { + for(j = 0; j < ny; j++) + { + flux[j][0] = u[k][j][i][2]; + u31 = u[k][j][i][2] * rho_i[k][j][i]; + q = qs[k][j][i]; + flux[j][1] = u[k][j][i][1] * u31; + flux[j][2] = u[k][j][i][2] * u31 + C2 * (u[k][j][i][4] - q); + flux[j][3] = u[k][j][i][3] * u31; + flux[j][4] = (C1 * u[k][j][i][4] - C2 * q) * u31; + } + for(j = jst; j < jend; j++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = + rsd[k][j][i][m] - ty2 * (flux[j + 1][m] - flux[j - 1][m]); + } + } + for(j = jst; j < ny; j++) + { + tmp = rho_i[k][j][i]; + u21j = tmp * u[k][j][i][1]; + u31j = tmp * u[k][j][i][2]; + u41j = tmp * u[k][j][i][3]; + u51j = tmp * u[k][j][i][4]; + tmp = rho_i[k][j - 1][i]; + u21jm1 = tmp * u[k][j - 1][i][1]; + u31jm1 = tmp * u[k][j - 1][i][2]; + u41jm1 = tmp * u[k][j - 1][i][3]; + u51jm1 = tmp * u[k][j - 1][i][4]; + flux[j][1] = ty3 * (u21j - u21jm1); + flux[j][2] = (4.0 / 3.0) * ty3 * (u31j - u31jm1); + flux[j][3] = ty3 * (u41j - u41jm1); + flux[j][4] = 0.50 * (1.0 - C1 * C5) * ty3 * + ((u21j * u21j + u31j * u31j + u41j * u41j) - + (u21jm1 * u21jm1 + u31jm1 * u31jm1 + u41jm1 * u41jm1)) + + (1.0 / 6.0) * ty3 * (u31j * u31j - u31jm1 * u31jm1) + + C1 * C5 * ty3 * (u51j - u51jm1); + } + for(j = jst; j < jend; j++) + { + rsd[k][j][i][0] = + rsd[k][j][i][0] + + dy1 * ty1 * + (u[k][j - 1][i][0] - 2.0 * u[k][j][i][0] + u[k][j + 1][i][0]); + rsd[k][j][i][1] = + rsd[k][j][i][1] + ty3 * C3 * C4 * (flux[j + 1][1] - flux[j][1]) + + dy2 * ty1 * + (u[k][j - 1][i][1] - 2.0 * u[k][j][i][1] + u[k][j + 1][i][1]); + rsd[k][j][i][2] = + rsd[k][j][i][2] + ty3 * C3 * C4 * (flux[j + 1][2] - flux[j][2]) + + dy3 * ty1 * + (u[k][j - 1][i][2] - 2.0 * u[k][j][i][2] + u[k][j + 1][i][2]); + rsd[k][j][i][3] = + rsd[k][j][i][3] + ty3 * C3 * C4 * (flux[j + 1][3] - flux[j][3]) + + dy4 * ty1 * + (u[k][j - 1][i][3] - 2.0 * u[k][j][i][3] + u[k][j + 1][i][3]); + rsd[k][j][i][4] = + rsd[k][j][i][4] + ty3 * C3 * C4 * (flux[j + 1][4] - flux[j][4]) + + dy5 * ty1 * + (u[k][j - 1][i][4] - 2.0 * u[k][j][i][4] + u[k][j + 1][i][4]); + } + } + /* + * --------------------------------------------------------------------- + * fourth-order dissipation + * --------------------------------------------------------------------- + */ + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][1][i][m] = + rsd[k][1][i][m] - + dssp * (+5.0 * u[k][1][i][m] - 4.0 * u[k][2][i][m] + u[k][3][i][m]); + rsd[k][2][i][m] = + rsd[k][2][i][m] - dssp * (-4.0 * u[k][1][i][m] + 6.0 * u[k][2][i][m] - + 4.0 * u[k][3][i][m] + u[k][4][i][m]); + } + } + for(j = 3; j < ny - 3; j++) + { + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = + rsd[k][j][i][m] - + dssp * (u[k][j - 2][i][m] - 4.0 * u[k][j - 1][i][m] + + 6.0 * u[k][j][i][m] - 4.0 * u[k][j + 1][i][m] + + u[k][j + 2][i][m]); + } + } + } + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][ny - 3][i][m] = + rsd[k][ny - 3][i][m] - + dssp * (u[k][ny - 5][i][m] - 4.0 * u[k][ny - 4][i][m] + + 6.0 * u[k][ny - 3][i][m] - 4.0 * u[k][ny - 2][i][m]); + rsd[k][ny - 2][i][m] = + rsd[k][ny - 2][i][m] - + dssp * (u[k][ny - 4][i][m] - 4.0 * u[k][ny - 3][i][m] + + 5.0 * u[k][ny - 2][i][m]); + } + } + } + if(timeron) + { + timer_stop(T_RHSY); + } + if(timeron) + { + timer_start(T_RHSZ); + } +/* + * --------------------------------------------------------------------- + * zeta-direction flux differences + * --------------------------------------------------------------------- + */ +#pragma omp for + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + for(k = 0; k < nz; k++) + { + utmp[k][0] = u[k][j][i][0]; + utmp[k][1] = u[k][j][i][1]; + utmp[k][2] = u[k][j][i][2]; + utmp[k][3] = u[k][j][i][3]; + utmp[k][4] = u[k][j][i][4]; + utmp[k][5] = rho_i[k][j][i]; + } + for(k = 0; k < nz; k++) + { + flux[k][0] = utmp[k][3]; + u41 = utmp[k][3] * utmp[k][5]; + q = qs[k][j][i]; + flux[k][1] = utmp[k][1] * u41; + flux[k][2] = utmp[k][2] * u41; + flux[k][3] = utmp[k][3] * u41 + C2 * (utmp[k][4] - q); + flux[k][4] = (C1 * utmp[k][4] - C2 * q) * u41; + } + for(k = 1; k < nz - 1; k++) + { + for(m = 0; m < 5; m++) + { + rtmp[k][m] = + rsd[k][j][i][m] - tz2 * (flux[k + 1][m] - flux[k - 1][m]); + } + } + for(k = 1; k < nz; k++) + { + tmp = utmp[k][5]; + u21k = tmp * utmp[k][1]; + u31k = tmp * utmp[k][2]; + u41k = tmp * utmp[k][3]; + u51k = tmp * utmp[k][4]; + tmp = utmp[k - 1][5]; + u21km1 = tmp * utmp[k - 1][1]; + u31km1 = tmp * utmp[k - 1][2]; + u41km1 = tmp * utmp[k - 1][3]; + u51km1 = tmp * utmp[k - 1][4]; + flux[k][1] = tz3 * (u21k - u21km1); + flux[k][2] = tz3 * (u31k - u31km1); + flux[k][3] = (4.0 / 3.0) * tz3 * (u41k - u41km1); + flux[k][4] = 0.50 * (1.0 - C1 * C5) * tz3 * + ((u21k * u21k + u31k * u31k + u41k * u41k) - + (u21km1 * u21km1 + u31km1 * u31km1 + u41km1 * u41km1)) + + (1.0 / 6.0) * tz3 * (u41k * u41k - u41km1 * u41km1) + + C1 * C5 * tz3 * (u51k - u51km1); + } + for(k = 1; k < nz - 1; k++) + { + rtmp[k][0] = + rtmp[k][0] + + dz1 * tz1 * (utmp[k - 1][0] - 2.0 * utmp[k][0] + utmp[k + 1][0]); + rtmp[k][1] = + rtmp[k][1] + tz3 * C3 * C4 * (flux[k + 1][1] - flux[k][1]) + + dz2 * tz1 * (utmp[k - 1][1] - 2.0 * utmp[k][1] + utmp[k + 1][1]); + rtmp[k][2] = + rtmp[k][2] + tz3 * C3 * C4 * (flux[k + 1][2] - flux[k][2]) + + dz3 * tz1 * (utmp[k - 1][2] - 2.0 * utmp[k][2] + utmp[k + 1][2]); + rtmp[k][3] = + rtmp[k][3] + tz3 * C3 * C4 * (flux[k + 1][3] - flux[k][3]) + + dz4 * tz1 * (utmp[k - 1][3] - 2.0 * utmp[k][3] + utmp[k + 1][3]); + rtmp[k][4] = + rtmp[k][4] + tz3 * C3 * C4 * (flux[k + 1][4] - flux[k][4]) + + dz5 * tz1 * (utmp[k - 1][4] - 2.0 * utmp[k][4] + utmp[k + 1][4]); + } + /* + * --------------------------------------------------------------------- + * fourth-order dissipation + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + rsd[1][j][i][m] = rtmp[1][m] - dssp * (+5.0 * utmp[1][m] - + 4.0 * utmp[2][m] + utmp[3][m]); + rsd[2][j][i][m] = + rtmp[2][m] - dssp * (-4.0 * utmp[1][m] + 6.0 * utmp[2][m] - + 4.0 * utmp[3][m] + utmp[4][m]); + } + for(k = 3; k < nz - 3; k++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = + rtmp[k][m] - + dssp * (utmp[k - 2][m] - 4.0 * utmp[k - 1][m] + 6.0 * utmp[k][m] - + 4.0 * utmp[k + 1][m] + utmp[k + 2][m]); + } + } + for(m = 0; m < 5; m++) + { + rsd[nz - 3][j][i][m] = + rtmp[nz - 3][m] - + dssp * (utmp[nz - 5][m] - 4.0 * utmp[nz - 4][m] + + 6.0 * utmp[nz - 3][m] - 4.0 * utmp[nz - 2][m]); + rsd[nz - 2][j][i][m] = + rtmp[nz - 2][m] - dssp * (utmp[nz - 4][m] - 4.0 * utmp[nz - 3][m] + + 5.0 * utmp[nz - 2][m]); + } + } + } + if(timeron) + { + timer_stop(T_RHSZ); + } + if(timeron) + { + timer_stop(T_RHS); + } +} + +/* + * --------------------------------------------------------------------- + * set the boundary values of dependent variables + * --------------------------------------------------------------------- + */ +void +setbv() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m; + double temp1[5], temp2[5]; +/* + * --------------------------------------------------------------------- + * set the dependent variable values along the top and bottom faces + * --------------------------------------------------------------------- + */ +#pragma omp for + for(j = 0; j < ny; j++) + { + for(i = 0; i < nx; i++) + { + exact(i, j, 0, temp1); + exact(i, j, nz - 1, temp2); + for(m = 0; m < 5; m++) + { + u[0][j][i][m] = temp1[m]; + u[nz - 1][j][i][m] = temp2[m]; + } + } + } +/* + * --------------------------------------------------------------------- + * set the dependent variable values along north and south faces + * --------------------------------------------------------------------- + */ +#pragma omp for + for(k = 0; k < nz; k++) + { + for(i = 0; i < nx; i++) + { + exact(i, 0, k, temp1); + exact(i, ny - 1, k, temp2); + for(m = 0; m < 5; m++) + { + u[k][0][i][m] = temp1[m]; + u[k][ny - 1][i][m] = temp2[m]; + } + } + } +/* + * --------------------------------------------------------------------- + * set the dependent variable values along east and west faces + * --------------------------------------------------------------------- + */ +#pragma omp for + for(k = 0; k < nz; k++) + { + for(j = 0; j < ny; j++) + { + exact(0, j, k, temp1); + exact(nx - 1, j, k, temp2); + for(m = 0; m < 5; m++) + { + u[k][j][0][m] = temp1[m]; + u[k][j][nx - 1][m] = temp2[m]; + } + } + } +} + +void +setcoeff() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + * set up coefficients + * --------------------------------------------------------------------- + */ + dxi = 1.0 / (nx0 - 1); + deta = 1.0 / (ny0 - 1); + dzeta = 1.0 / (nz0 - 1); + tx1 = 1.0 / (dxi * dxi); + tx2 = 1.0 / (2.0 * dxi); + tx3 = 1.0 / dxi; + ty1 = 1.0 / (deta * deta); + ty2 = 1.0 / (2.0 * deta); + ty3 = 1.0 / deta; + tz1 = 1.0 / (dzeta * dzeta); + tz2 = 1.0 / (2.0 * dzeta); + tz3 = 1.0 / dzeta; + /* + * --------------------------------------------------------------------- + * diffusion coefficients + * --------------------------------------------------------------------- + */ + dx1 = 0.75; + dx2 = dx1; + dx3 = dx1; + dx4 = dx1; + dx5 = dx1; + dy1 = 0.75; + dy2 = dy1; + dy3 = dy1; + dy4 = dy1; + dy5 = dy1; + dz1 = 1.00; + dz2 = dz1; + dz3 = dz1; + dz4 = dz1; + dz5 = dz1; + /* + * --------------------------------------------------------------------- + * fourth difference dissipation + * --------------------------------------------------------------------- + */ + dssp = (max(max(dx1, dy1), dz1)) / 4.0; + /* + * --------------------------------------------------------------------- + * coefficients of the exact solution to the first pde + * --------------------------------------------------------------------- + */ + ce[0][0] = 2.0; + ce[1][0] = 0.0; + ce[2][0] = 0.0; + ce[3][0] = 4.0; + ce[4][0] = 5.0; + ce[5][0] = 3.0; + ce[6][0] = 5.0e-01; + ce[7][0] = 2.0e-02; + ce[8][0] = 1.0e-02; + ce[9][0] = 3.0e-02; + ce[10][0] = 5.0e-01; + ce[11][0] = 4.0e-01; + ce[12][0] = 3.0e-01; + /* + * --------------------------------------------------------------------- + * coefficients of the exact solution to the second pde + * --------------------------------------------------------------------- + */ + ce[0][1] = 1.0; + ce[1][1] = 0.0; + ce[2][1] = 0.0; + ce[3][1] = 0.0; + ce[4][1] = 1.0; + ce[5][1] = 2.0; + ce[6][1] = 3.0; + ce[7][1] = 1.0e-02; + ce[8][1] = 3.0e-02; + ce[9][1] = 2.0e-02; + ce[10][1] = 4.0e-01; + ce[11][1] = 3.0e-01; + ce[12][1] = 5.0e-01; + /* + * --------------------------------------------------------------------- + * coefficients of the exact solution to the third pde + * --------------------------------------------------------------------- + */ + ce[0][2] = 2.0; + ce[1][2] = 2.0; + ce[2][2] = 0.0; + ce[3][2] = 0.0; + ce[4][2] = 0.0; + ce[5][2] = 2.0; + ce[6][2] = 3.0; + ce[7][2] = 4.0e-02; + ce[8][2] = 3.0e-02; + ce[9][2] = 5.0e-02; + ce[10][2] = 3.0e-01; + ce[11][2] = 5.0e-01; + ce[12][2] = 4.0e-01; + /* + * --------------------------------------------------------------------- + * coefficients of the exact solution to the fourth pde + * --------------------------------------------------------------------- + */ + ce[0][3] = 2.0; + ce[1][3] = 2.0; + ce[2][3] = 0.0; + ce[3][3] = 0.0; + ce[4][3] = 0.0; + ce[5][3] = 2.0; + ce[6][3] = 3.0; + ce[7][3] = 3.0e-02; + ce[8][3] = 5.0e-02; + ce[9][3] = 4.0e-02; + ce[10][3] = 2.0e-01; + ce[11][3] = 1.0e-01; + ce[12][3] = 3.0e-01; + /* + * --------------------------------------------------------------------- + * coefficients of the exact solution to the fifth pde + * --------------------------------------------------------------------- + */ + ce[0][4] = 5.0; + ce[1][4] = 4.0; + ce[2][4] = 3.0; + ce[3][4] = 2.0; + ce[4][4] = 1.0e-01; + ce[5][4] = 4.0e-01; + ce[6][4] = 3.0e-01; + ce[7][4] = 5.0e-02; + ce[8][4] = 4.0e-02; + ce[9][4] = 3.0e-02; + ce[10][4] = 1.0e-01; + ce[11][4] = 3.0e-01; + ce[12][4] = 2.0e-01; +} + +/* + * --------------------------------------------------------------------- + * set the initial values of independent variables based on tri-linear + * interpolation of boundary values in the computational space. + * --------------------------------------------------------------------- + */ +void +setiv() +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m; + double xi, eta, zeta; + double pxi, peta, pzeta; + double ue_1jk[5], ue_nx0jk[5], ue_i1k[5]; + double ue_iny0k[5], ue_ij1[5], ue_ijnz[5]; + +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + zeta = ((double) k) / (nz - 1); + for(j = 1; j < ny - 1; j++) + { + eta = ((double) j) / (ny0 - 1); + for(i = 1; i < nx - 1; i++) + { + xi = ((double) i) / (nx0 - 1); + exact(0, j, k, ue_1jk); + exact(nx0 - 1, j, k, ue_nx0jk); + exact(i, 0, k, ue_i1k); + exact(i, ny0 - 1, k, ue_iny0k); + exact(i, j, 0, ue_ij1); + exact(i, j, nz - 1, ue_ijnz); + for(m = 0; m < 5; m++) + { + pxi = (1.0 - xi) * ue_1jk[m] + xi * ue_nx0jk[m]; + peta = (1.0 - eta) * ue_i1k[m] + eta * ue_iny0k[m]; + pzeta = (1.0 - zeta) * ue_ij1[m] + zeta * ue_ijnz[m]; + u[k][j][i][m] = pxi + peta + pzeta - pxi * peta - peta * pzeta - + pzeta * pxi + pxi * peta * pzeta; + } + } + } + } +} + +/* + * --------------------------------------------------------------------- + * to perform pseudo-time stepping SSOR iterations + * for five nonlinear pde's. + * --------------------------------------------------------------------- + */ +void +ssor(int niter) +{ + /* + * --------------------------------------------------------------------- + * local variables + * --------------------------------------------------------------------- + */ + int i, j, k, m, n; + int istep; + double tmp, tv[ISIZ2 * (ISIZ1 / 2 * 2 + 1) * 5]; + double delunm[5]; + /* + * --------------------------------------------------------------------- + * begin pseudo-time stepping iterations + * --------------------------------------------------------------------- + */ + tmp = 1.0 / (omega * (2.0 - omega)); + +/* + * --------------------------------------------------------------------- + * initialize a,b,c,d to zero (guarantees that page tables have been + * formed, if applicable on given architecture, before timestepping). + * --------------------------------------------------------------------- + */ +#pragma omp parallel for private(i, j, n, m) + for(j = 0; j < ISIZ2; j++) + { + for(i = 0; i < ISIZ1; i++) + { + for(n = 0; n < 5; n++) + { + for(m = 0; m < 5; m++) + { + a[j][i][n][m] = 0.0; + b[j][i][n][m] = 0.0; + c[j][i][n][m] = 0.0; + d[j][i][n][m] = 0.0; + } + } + } + } + for(i = 1; i <= T_LAST; i++) + { + timer_clear(i); + } + +#pragma omp parallel + { + /* + * --------------------------------------------------------------------- + * compute the steady-state residuals + * --------------------------------------------------------------------- + */ + rhs(); + + /* + * --------------------------------------------------------------------- + * compute the L2 norms of newton iteration residuals + * --------------------------------------------------------------------- + */ + l2norm(nx0, ny0, nz0, ist, iend, jst, jend, rsd, rsdnm); + } /* end parallel */ + + for(i = 1; i <= T_LAST; i++) + { + timer_clear(i); + } + timer_start(1); + +#pragma omp parallel private(istep, i, j, k, m) + { + /* + * --------------------------------------------------------------------- + * the timestep loop + * --------------------------------------------------------------------- + */ + for(istep = 1; istep <= niter; istep++) + { + if((istep % 20) == 0 || istep == itmax || istep == 1) + { +#pragma omp master + if(niter > 1) + { + printf(" Time step %4d\n", istep); + } + } + /* + * --------------------------------------------------------------------- + * perform SSOR iteration + * --------------------------------------------------------------------- + */ + if(timeron) + { +#pragma omp master + timer_start(T_RHS); + } +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + rsd[k][j][i][m] = dt * rsd[k][j][i][m]; + } + } + } + } + if(timeron) + { +#pragma omp master + timer_stop(T_RHS); + } + + for(k = 1; k < nz - 1; k++) + { + /* + * --------------------------------------------------------------------- + * form the lower triangular part of the jacobian matrix + * --------------------------------------------------------------------- + */ + if(timeron) + { +#pragma omp master + timer_start(T_JACLD); + } + jacld(k); + if(timeron) + { +#pragma omp master + timer_stop(T_JACLD); + } + + /* + * --------------------------------------------------------------------- + * perform the lower triangular solution + * --------------------------------------------------------------------- + */ + if(timeron) + { +#pragma omp master + timer_start(T_BLTS); + } + + blts(nx, ny, nz, k, omega, rsd, a, b, c, d, ist, iend, jst, jend, nx0, + ny0); + + if(timeron) + { +#pragma omp master + timer_stop(T_BLTS); + } + } + +#pragma omp barrier + + for(k = nz - 2; k > 0; k--) + { + /* + * --------------------------------------------------------------------- + * form the strictly upper triangular part of the jacobian matrix + * --------------------------------------------------------------------- + */ + if(timeron) + { +#pragma omp master + timer_start(T_JACU); + } + jacu(k); + if(timeron) + { +#pragma omp master + timer_stop(T_JACU); + } + /* + * --------------------------------------------------------------------- + * perform the upper triangular solution + * --------------------------------------------------------------------- + */ + if(timeron) + { +#pragma omp master + timer_start(T_BUTS); + } + + buts(nx, ny, nz, k, omega, rsd, tv, d, a, b, c, ist, iend, jst, jend, nx0, + ny0); + + if(timeron) + { +#pragma omp master + timer_stop(T_BUTS); + } + } + +#pragma omp barrier + + /* + * --------------------------------------------------------------------- + * update the variables + * --------------------------------------------------------------------- + */ + if(timeron) + { +#pragma omp master + timer_start(T_ADD); + } + +#pragma omp for + for(k = 1; k < nz - 1; k++) + { + for(j = jst; j < jend; j++) + { + for(i = ist; i < iend; i++) + { + for(m = 0; m < 5; m++) + { + u[k][j][i][m] = u[k][j][i][m] + tmp * rsd[k][j][i][m]; + } + } + } + } + if(timeron) + { +#pragma omp master + timer_stop(T_ADD); + } + /* + * --------------------------------------------------------------------- + * compute the max-norms of newton iteration corrections + * --------------------------------------------------------------------- + */ + + if((istep % inorm) == 0) + { + if(timeron) + { +#pragma omp master + timer_start(T_L2NORM); + } + l2norm(nx0, ny0, nz0, ist, iend, jst, jend, rsd, delunm); + if(timeron) + { +#pragma omp master + timer_stop(T_L2NORM); + } + } + /* + * --------------------------------------------------------------------- + * compute the steady-state residuals + * --------------------------------------------------------------------- + */ + rhs(); + + /* + * --------------------------------------------------------------------- + * compute the max-norms of newton iteration residuals + * --------------------------------------------------------------------- + */ + if(((istep % inorm) == 0) || (istep == itmax)) + { + if(timeron) + { +#pragma omp master + timer_start(T_L2NORM); + } + l2norm(nx0, ny0, nz0, ist, iend, jst, jend, rsd, rsdnm); + if(timeron) + { +#pragma omp master + timer_stop(T_L2NORM); + } + } + /* + * --------------------------------------------------------------------- + * check the newton-iteration residuals against the tolerance levels + * --------------------------------------------------------------------- + */ + if((rsdnm[0] < tolrsd[0]) && (rsdnm[1] < tolrsd[1]) && + (rsdnm[2] < tolrsd[2]) && (rsdnm[3] < tolrsd[3]) && (rsdnm[4] < tolrsd[4])) + { +#pragma omp master + printf(" \n convergence was achieved after %4d pseudo-time steps\n", + istep); + break; + } + } + } /* end parallel */ + + timer_stop(1); + maxtime = timer_read(1); +} + +/* + * --------------------------------------------------------------------- + * verification routine + * --------------------------------------------------------------------- + */ +void +verify(double xcr[], double xce[], double xci, char* class_npb, boolean* verified) +{ + double xcrref[5], xceref[5], xciref; + double xcrdif[5], xcedif[5], xcidif; + double epsilon, dtref = 0.0; + int m; + /* + * --------------------------------------------------------------------- + * tolerance level + * --------------------------------------------------------------------- + */ + epsilon = 1.0e-08; + *class_npb = 'U'; + *verified = TRUE; + for(m = 0; m < 5; m++) + { + xcrref[m] = 1.0; + xceref[m] = 1.0; + } + xciref = 1.0; + if((nx0 == 12) && (ny0 == 12) && (nz0 == 12) && (itmax == 50)) + { + *class_npb = 'S'; + dtref = 5.0e-1; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (12X12X12) grid, + * after 50 time steps, with DT = 5.0d-01 + * --------------------------------------------------------------------- + */ + xcrref[0] = 1.6196343210976702e-02; + xcrref[1] = 2.1976745164821318e-03; + xcrref[2] = 1.5179927653399185e-03; + xcrref[3] = 1.5029584435994323e-03; + xcrref[4] = 3.4264073155896461e-02; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (12X12X12) grid, + * after 50 time steps, with DT = 5.0d-01 + * --------------------------------------------------------------------- + */ + xceref[0] = 6.4223319957960924e-04; + xceref[1] = 8.4144342047347926e-05; + xceref[2] = 5.8588269616485186e-05; + xceref[3] = 5.8474222595157350e-05; + xceref[4] = 1.3103347914111294e-03; + /* + * --------------------------------------------------------------------- + * reference value of surface integral, for the (12X12X12) grid, + * after 50 time steps, with DT = 5.0d-01 + * --------------------------------------------------------------------- + */ + xciref = 7.8418928865937083e+00; + } + else if((nx0 == 33) && (ny0 == 33) && (nz0 == 33) && (itmax == 300)) + { + *class_npb = 'W'; /* SPEC95fp size */ + dtref = 1.5e-3; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (33x33x33) grid, + * after 300 time steps, with DT = 1.5d-3 + * --------------------------------------------------------------------- + */ + xcrref[0] = 0.1236511638192e+02; + xcrref[1] = 0.1317228477799e+01; + xcrref[2] = 0.2550120713095e+01; + xcrref[3] = 0.2326187750252e+01; + xcrref[4] = 0.2826799444189e+02; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (33X33X33) grid, + * --------------------------------------------------------------------- + */ + xceref[0] = 0.4867877144216e+00; + xceref[1] = 0.5064652880982e-01; + xceref[2] = 0.9281818101960e-01; + xceref[3] = 0.8570126542733e-01; + xceref[4] = 0.1084277417792e+01; + /* + * --------------------------------------------------------------------- + * rReference value of surface integral, for the (33X33X33) grid, + * after 300 time steps, with DT = 1.5d-3 + * --------------------------------------------------------------------- + */ + xciref = 0.1161399311023e+02; + } + else if((nx0 == 64) && (ny0 == 64) && (nz0 == 64) && (itmax == 250)) + { + *class_npb = 'A'; + dtref = 2.0e+0; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (64X64X64) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xcrref[0] = 7.7902107606689367e+02; + xcrref[1] = 6.3402765259692870e+01; + xcrref[2] = 1.9499249727292479e+02; + xcrref[3] = 1.7845301160418537e+02; + xcrref[4] = 1.8384760349464247e+03; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (64X64X64) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xceref[0] = 2.9964085685471943e+01; + xceref[1] = 2.8194576365003349e+00; + xceref[2] = 7.3473412698774742e+00; + xceref[3] = 6.7139225687777051e+00; + xceref[4] = 7.0715315688392578e+01; + /* + * --------------------------------------------------------------------- + * reference value of surface integral, for the (64X64X64) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xciref = 2.6030925604886277e+01; + } + else if((nx0 == 102) && (ny0 == 102) && (nz0 == 102) && (itmax == 250)) + { + *class_npb = 'B'; + dtref = 2.0e+0; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (102X102X102) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xcrref[0] = 3.5532672969982736e+03; + xcrref[1] = 2.6214750795310692e+02; + xcrref[2] = 8.8333721850952190e+02; + xcrref[3] = 7.7812774739425265e+02; + xcrref[4] = 7.3087969592545314e+03; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (102X102X102) + * grid, after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xceref[0] = 1.1401176380212709e+02; + xceref[1] = 8.1098963655421574e+00; + xceref[2] = 2.8480597317698308e+01; + xceref[3] = 2.5905394567832939e+01; + xceref[4] = 2.6054907504857413e+02; + /* + c--------------------------------------------------------------------- + * reference value of surface integral, for the (102X102X102) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xciref = 4.7887162703308227e+01; + } + else if((nx0 == 162) && (ny0 == 162) && (nz0 == 162) && (itmax == 250)) + { + *class_npb = 'C'; + dtref = 2.0e+0; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (162X162X162) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xcrref[0] = 1.03766980323537846e+04; + xcrref[1] = 8.92212458801008552e+02; + xcrref[2] = 2.56238814582660871e+03; + xcrref[3] = 2.19194343857831427e+03; + xcrref[4] = 1.78078057261061185e+04; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (162X162X162) + * grid, after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xceref[0] = 2.15986399716949279e+02; + xceref[1] = 1.55789559239863600e+01; + xceref[2] = 5.41318863077207766e+01; + xceref[3] = 4.82262643154045421e+01; + xceref[4] = 4.55902910043250358e+02; + /* + * --------------------------------------------------------------------- + * reference value of surface integral, for the (162X162X162) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xciref = 6.66404553572181300e+01; + /* + * --------------------------------------------------------------------- + * reference value of surface integral, for the (162X162X162) grid, + * after 250 time steps, with DT = 2.0d+00 + * --------------------------------------------------------------------- + */ + xciref = 6.66404553572181300e+01; + } + else if((nx0 == 408) && (ny0 == 408) && (nz0 == 408) && (itmax == 300)) + { + *class_npb = 'D'; + dtref = 1.0e+0; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (408X408X408) grid, + * after 300 time steps, with DT = 1.0d+00 + * --------------------------------------------------------------------- + */ + xcrref[0] = 0.4868417937025e+05; + xcrref[1] = 0.4696371050071e+04; + xcrref[2] = 0.1218114549776e+05; + xcrref[3] = 0.1033801493461e+05; + xcrref[4] = 0.7142398413817e+05; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (408X408X408) + * grid, after 300 time steps, with DT = 1.0d+00 + * --------------------------------------------------------------------- + */ + xceref[0] = 0.3752393004482e+03; + xceref[1] = 0.3084128893659e+02; + xceref[2] = 0.9434276905469e+02; + xceref[3] = 0.8230686681928e+02; + xceref[4] = 0.7002620636210e+03; + /* + * --------------------------------------------------------------------- + * reference value of surface integral, for the (408X408X408) grid, + * after 300 time steps, with DT = 1.0d+00 + * --------------------------------------------------------------------- + */ + xciref = 0.8334101392503e+02; + } + else if((nx0 == 1020) && (ny0 == 1020) && (nz0 == 1020) && (itmax == 300)) + { + *class_npb = 'E'; + dtref = 0.5e+0; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of residual, for the (1020X1020X1020) grid, + * after 300 time steps, with DT = 0.5d+00 + * --------------------------------------------------------------------- + */ + xcrref[0] = 0.2099641687874e+06; + xcrref[1] = 0.2130403143165e+05; + xcrref[2] = 0.5319228789371e+05; + xcrref[3] = 0.4509761639833e+05; + xcrref[4] = 0.2932360006590e+06; + /* + * --------------------------------------------------------------------- + * reference values of RMS-norms of solution error, for the (1020X1020X1020) + * grid, after 300 time steps, with DT = 0.5d+00 + * --------------------------------------------------------------------- + */ + xceref[0] = 0.4800572578333e+03; + xceref[1] = 0.4221993400184e+02; + xceref[2] = 0.1210851906824e+03; + xceref[3] = 0.1047888986770e+03; + xceref[4] = 0.8363028257389e+03; + /* + * --------------------------------------------------------------------- + * reference value of surface integral, for the (1020X1020X1020) grid, + * after 300 time steps, with DT = 0.5d+00 + * --------------------------------------------------------------------- + */ + xciref = 0.9512163272273e+02; + } + else + { + *verified = FALSE; + } + /* + * --------------------------------------------------------------------- + * verification test for residuals if gridsize is one of + * the defined grid sizes above (class .ne. 'U') + * --------------------------------------------------------------------- + * compute the difference of solution values and the known reference values. + * --------------------------------------------------------------------- + */ + for(m = 0; m < 5; m++) + { + xcrdif[m] = fabs((xcr[m] - xcrref[m]) / xcrref[m]); + xcedif[m] = fabs((xce[m] - xceref[m]) / xceref[m]); + } + xcidif = fabs((xci - xciref) / xciref); + /* + * --------------------------------------------------------------------- + * output the comparison of computed results to known cases. + * --------------------------------------------------------------------- + */ + if(*class_npb != 'U') + { + printf("\n Verification being performed for class_npb %c\n", *class_npb); + printf(" Accuracy setting for epsilon = %20.13E\n", epsilon); + *verified = (fabs(dt - dtref) <= epsilon); + if(!(*verified)) + { + *class_npb = 'U'; + printf(" DT does not match the reference value of %15.8E\n", dtref); + } + } + else + { + printf(" Unknown class_npb\n"); + } + if(*class_npb != 'U') + { + printf(" Comparison of RMS-norms of residual\n"); + } + else + { + printf(" RMS-norms of residual\n"); + } + for(m = 0; m < 5; m++) + { + if(*class_npb == 'U') + { + printf(" %2d %20.13E\n", m + 1, xcr[m]); + } + else if(xcrdif[m] <= epsilon) + { + printf(" %2d %20.13E%20.13E%20.13E\n", m + 1, xcr[m], xcrref[m], + xcrdif[m]); + } + else + { + *verified = FALSE; + printf(" FAILURE: %2d %20.13E%20.13E%20.13E\n", m + 1, xcr[m], xcrref[m], + xcrdif[m]); + } + } + if(*class_npb != 'U') + { + printf(" Comparison of RMS-norms of solution error\n"); + } + else + { + printf(" RMS-norms of solution error\n"); + } + for(m = 0; m < 5; m++) + { + if(*class_npb == 'U') + { + printf(" %2d %20.13E\n", m + 1, xce[m]); + } + else if(xcedif[m] <= epsilon) + { + printf(" %2d %20.13E%20.13E%20.13E\n", m + 1, xce[m], xceref[m], + xcedif[m]); + } + else + { + *verified = FALSE; + printf(" FAILURE: %2d %20.13E%20.13E%20.13E\n", m + 1, xce[m], xceref[m], + xcedif[m]); + } + } + if(*class_npb != 'U') + { + printf(" Comparison of surface integral\n"); + } + else + { + printf(" Surface integral\n"); + } + if(*class_npb == 'U') + { + printf(" %20.13E\n", xci); + } + else if(xcidif <= epsilon) + { + printf(" %20.13E%20.13E%20.13E\n", xci, xciref, xcidif); + } + else + { + *verified = FALSE; + printf(" FAILURE: %20.13E%20.13E%20.13E\n", xci, xciref, xcidif); + } + if(*class_npb == 'U') + { + printf(" No reference values provided\n"); + printf("No verification performed\n"); + } + else if(*verified) + { + printf(" Verification Successful\n"); + } + else + { + printf(" Verification failed\n"); + } +} diff --git a/projects/rocprofiler-systems/examples/openmp/LU/npbparams.hpp b/projects/rocprofiler-systems/examples/openmp/LU/npbparams.hpp new file mode 100644 index 0000000000..dbb8c25eac --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/LU/npbparams.hpp @@ -0,0 +1,27 @@ +/* CLASS = W */ +/* + c This file is generated automatically by the setparams utility. + c It sets the number of processors and the class_npb of the NPB + c in this directory. Do not modify it by hand. + */ + +/* full problem size */ +#define ISIZ1 25 +#define ISIZ2 25 +#define ISIZ3 25 +/* number of iterations and how often to print the norm */ +#define ITMAX_DEFAULT 150 +#define INORM_DEFAULT 150 +#define DT_DEFAULT 1.5e-3 +#define CONVERTDOUBLE FALSE +#define COMPILETIME "07 Mar 2022" +#define NPBVERSION "4.1" +#define LIBVERSION "201511" +#define COMPILERVERSION "11.1.0" +#define CS1 "g++ -std=c++14" +#define CS2 "$(CC)" +#define CS3 "-lm" +#define CS4 "-I../common " +#define CS5 "-O3 -fopenmp -mcmodel=medium" +#define CS6 "-O3 -fopenmp -mcmodel=medium" +#define CS7 "randdp" diff --git a/projects/rocprofiler-systems/examples/openmp/common/c_print_results.cpp b/projects/rocprofiler-systems/examples/openmp/common/c_print_results.cpp new file mode 100644 index 0000000000..2dbfb2e6e5 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/common/c_print_results.cpp @@ -0,0 +1,159 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff +*/ + +#include +#include +#include + +/*****************************************************************/ +/****** C _ P R I N T _ R E S U L T S ******/ +/*****************************************************************/ +void +c_print_results(char* name, char class_npb, int n1, int n2, int n3, int niter, double t, + double mops, char* optype, int passed_verification, char* npbversion, + char* compiletime, char* compilerversion, char* libversion, + char* totalthreads, char* cc, char* clink, char* c_lib, char* c_inc, + char* cflags, char* clinkflags, char* rand) +{ + printf("\n\n %s Benchmark Completed\n", name); + printf(" class_npb = %c\n", class_npb); + if((name[0] == 'I') && (name[1] == 'S')) + { + if(n3 == 0) + { + long nn = n1; + if(n2 != 0) + { + nn *= n2; + } + printf(" Size = %12ld\n", nn); /* as in IS */ + } + else + { + printf(" Size = %4dx%4dx%4d\n", n1, n2, n3); + } + } + else + { + char size[16]; + int j; + if((n2 == 0) && (n3 == 0)) + { + if((name[0] == 'E') && (name[1] == 'P')) + { + sprintf(size, "%15.0lf", pow(2.0, n1)); + j = 14; + if(size[j] == '.') + { + size[j] = ' '; + j--; + } + size[j + 1] = '\0'; + printf(" Size = %15s\n", size); + } + else + { + printf(" Size = %12d\n", n1); + } + } + else + { + printf(" Size = %4dx%4dx%4d\n", n1, n2, n3); + } + } + printf(" Total threads = %12s\n", totalthreads); + printf(" Iterations = %12d\n", niter); + printf(" Time in seconds = %12.2f\n", t); + printf(" Mop/s total = %12.2f\n", mops); + printf(" Operation type = %24s\n", optype); + if(passed_verification < 0) + { + printf(" Verification = NOT PERFORMED\n"); + } + else if(passed_verification) + { + printf(" Verification = SUCCESSFUL\n"); + } + else + { + printf(" Verification = UNSUCCESSFUL\n"); + } + printf(" Version = %12s\n", npbversion); + printf(" Compile date = %12s\n", compiletime); + printf(" Compiler ver = %12s\n", compilerversion); + printf(" OpenMP version = %12s\n", libversion); + printf("\n Compile options:\n"); + printf(" CC = %s\n", cc); + printf(" CLINK = %s\n", clink); + printf(" C_LIB = %s\n", c_lib); + printf(" C_INC = %s\n", c_inc); + printf(" CFLAGS = %s\n", cflags); + printf(" CLINKFLAGS = %s\n", clinkflags); + printf(" RAND = %s\n", rand); +#ifdef SMP + evalue = getenv("MP_SET_NUMTHREADS"); + printf(" MULTICPUS = %s\n", evalue); +#endif + /* + * printf(" Please send the results of this run to:\n\n"); + * printf(" NPB Development Team\n"); + * printf(" Internet: npb@nas.nasa.gov\n \n"); + * printf(" If email is not available, send this to:\n\n"); + * printf(" MS T27A-1\n"); + * printf(" NASA Ames Research Center\n"); + * printf(" Moffett Field, CA 94035-1000\n\n"); + * printf(" Fax: 650-604-3957\n\n"); + */ + printf("\n\n"); + + printf("----------------------------------------------------------------------\n"); + printf(" NPB-CPP is developed by: \n"); + printf(" Dalvan Griebler\n"); + printf(" Gabriell Araujo (Sequential Porting)\n"); + printf(" Júnior Löff (Parallel Implementation)\n"); + printf("\n"); + printf(" In case of questions or problems, please send an e-mail to us:\n"); + printf(" dalvan.griebler; gabriell.araujo; junior.loff@edu.pucrs.br\n"); + printf("----------------------------------------------------------------------\n"); + printf("\n"); +} diff --git a/projects/rocprofiler-systems/examples/openmp/common/c_randdp.cpp b/projects/rocprofiler-systems/examples/openmp/common/c_randdp.cpp new file mode 100644 index 0000000000..74ba992d36 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/common/c_randdp.cpp @@ -0,0 +1,184 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff +*/ + +#if defined(USE_POW) +# define r23 pow(0.5, 23.0) +# define r46 (r23 * r23) +# define t23 pow(2.0, 23.0) +# define t46 (t23 * t23) +#else +# define r23 \ + (0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * \ + 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5 * 0.5) +# define r46 (r23 * r23) +# define t23 \ + (2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * \ + 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0 * 2.0) +# define t46 (t23 * t23) +#endif + +/* + * --------------------------------------------------------------------- + * + * this routine returns a uniform pseudorandom double precision number in the + * range (0, 1) by using the linear congruential generator + * + * x_{k+1} = a x_k (mod 2^46) + * + * where 0 < x_k < 2^46 and 0 < a < 2^46. this scheme generates 2^44 numbers + * before repeating. the argument A is the same as 'a' in the above formula, + * and X is the same as x_0. A and X must be odd double precision integers + * in the range (1, 2^46). the returned value RANDLC is normalized to be + * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain + * the new seed x_1, so that subsequent calls to RANDLC using the same + * arguments will generate a continuous sequence. + * + * this routine should produce the same results on any computer with at least + * 48 mantissa bits in double precision floating point data. On 64 bit + * systems, double precision should be disabled. + * + * David H. Bailey, October 26, 1990 + * + * --------------------------------------------------------------------- + */ +double +randlc(double* x, double a) +{ + double t1, t2, t3, t4, a1, a2, x1, x2, z; + + /* + * --------------------------------------------------------------------- + * break A into two parts such that A = 2^23 * A1 + A2. + * --------------------------------------------------------------------- + */ + t1 = r23 * a; + a1 = (int) t1; + a2 = a - t23 * a1; + + /* + * --------------------------------------------------------------------- + * break X into two parts such that X = 2^23 * X1 + X2, compute + * Z = A1 * X2 + A2 * X1 (mod 2^23), and then + * X = 2^23 * Z + A2 * X2 (mod 2^46). + * --------------------------------------------------------------------- + */ + t1 = r23 * (*x); + x1 = (int) t1; + x2 = (*x) - t23 * x1; + t1 = a1 * x2 + a2 * x1; + t2 = (int) (r23 * t1); + z = t1 - t23 * t2; + t3 = t23 * z + a2 * x2; + t4 = (int) (r46 * t3); + (*x) = t3 - t46 * t4; + + return (r46 * (*x)); +} + +/* + * --------------------------------------------------------------------- + * + * this routine generates N uniform pseudorandom double precision numbers in + * the range (0, 1) by using the linear congruential generator + * + * x_{k+1} = a x_k (mod 2^46) + * + * where 0 < x_k < 2^46 and 0 < a < 2^46. this scheme generates 2^44 numbers + * before repeating. the argument A is the same as 'a' in the above formula, + * and X is the same as x_0. A and X must be odd double precision integers + * in the range (1, 2^46). the N results are placed in Y and are normalized + * to be between 0 and 1. X is updated to contain the new seed, so that + * subsequent calls to VRANLC using the same arguments will generate a + * continuous sequence. if N is zero, only initialization is performed, and + * the variables X, A and Y are ignored. + * + * this routine is the standard version designed for scalar or RISC systems. + * however, it should produce the same results on any single processor + * computer with at least 48 mantissa bits in double precision floating point + * data. on 64 bit systems, double precision should be disabled. + * + * --------------------------------------------------------------------- + */ +void +vranlc(int n, double* x_seed, double a, double y[]) +{ + int i; + double x, t1, t2, t3, t4, a1, a2, x1, x2, z; + + /* + * --------------------------------------------------------------------- + * break A into two parts such that A = 2^23 * A1 + A2. + * --------------------------------------------------------------------- + */ + t1 = r23 * a; + a1 = (int) t1; + a2 = a - t23 * a1; + x = *x_seed; + + /* + * --------------------------------------------------------------------- + * generate N results. this loop is not vectorizable. + * --------------------------------------------------------------------- + */ + for(i = 0; i < n; i++) + { + /* + * --------------------------------------------------------------------- + * break X into two parts such that X = 2^23 * X1 + X2, compute + * Z = A1 * X2 + A2 * X1 (mod 2^23), and then + * X = 2^23 * Z + A2 * X2 (mod 2^46). + * --------------------------------------------------------------------- + */ + t1 = r23 * x; + x1 = (int) t1; + x2 = x - t23 * x1; + t1 = a1 * x2 + a2 * x1; + t2 = (int) (r23 * t1); + z = t1 - t23 * t2; + t3 = t23 * z + a2 * x2; + t4 = (int) (r46 * t3); + x = t3 - t46 * t4; + y[i] = r46 * x; + } + *x_seed = x; +} \ No newline at end of file diff --git a/projects/rocprofiler-systems/examples/openmp/common/c_timers.cpp b/projects/rocprofiler-systems/examples/openmp/common/c_timers.cpp new file mode 100644 index 0000000000..fbcdb4bfe2 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/common/c_timers.cpp @@ -0,0 +1,101 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff +*/ + +#include "wtime.hpp" +#include + +/* prototype */ +void +wtime(double*); + +/*****************************************************************/ +/****** E L A P S E D _ T I M E ******/ +/*****************************************************************/ +double +elapsed_time(void) +{ + double t; + wtime(&t); + return (t); +} + +double start[64], elapsed[64]; + +/*****************************************************************/ +/****** T I M E R _ C L E A R ******/ +/*****************************************************************/ +void +timer_clear(int n) +{ + elapsed[n] = 0.0; +} + +/*****************************************************************/ +/****** T I M E R _ S T A R T ******/ +/*****************************************************************/ +void +timer_start(int n) +{ + start[n] = elapsed_time(); +} + +/*****************************************************************/ +/****** T I M E R _ S T O P ******/ +/*****************************************************************/ +void +timer_stop(int n) +{ + double t, now; + now = elapsed_time(); + t = now - start[n]; + elapsed[n] += t; +} + +/*****************************************************************/ +/****** T I M E R _ R E A D ******/ +/*****************************************************************/ +double +timer_read(int n) +{ + return (elapsed[n]); +} diff --git a/projects/rocprofiler-systems/examples/openmp/common/npb-CPP.hpp b/projects/rocprofiler-systems/examples/openmp/common/npb-CPP.hpp new file mode 100644 index 0000000000..cb50667d75 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/common/npb-CPP.hpp @@ -0,0 +1,124 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff +*/ + +#include +#include +#include + +typedef int boolean; +typedef struct +{ + double real; + double imag; +} dcomplex; + +#define TRUE 1 +#define FALSE 0 + +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define pow2(a) ((a) * (a)) + +/* old version of the complex number operations */ +#define get_real(c) c.real +#define get_imag(c) c.imag +#define cadd(c, a, b) (c.real = a.real + b.real, c.imag = a.imag + b.imag) +#define csub(c, a, b) (c.real = a.real - b.real, c.imag = a.imag - b.imag) +#define cmul(c, a, b) \ + (c.real = a.real * b.real - a.imag * b.imag, \ + c.imag = a.real * b.imag + a.imag * b.real) +#define crmul(c, a, b) (c.real = a.real * b, c.imag = a.imag * b) + +/* latest version of the complex number operations */ +#define dcomplex_create(r, i) \ + (dcomplex) { r, i } +#define dcomplex_add(a, b) \ + (dcomplex) { (a).real + (b).real, (a).imag + (b).imag } +#define dcomplex_sub(a, b) \ + (dcomplex) { (a).real - (b).real, (a).imag - (b).imag } +#define dcomplex_mul(a, b) \ + (dcomplex) \ + { \ + ((a).real * (b).real) - ((a).imag * (b).imag), \ + ((a).real * (b).imag) + ((a).imag * (b).real) \ + } +#define dcomplex_mul2(a, b) \ + (dcomplex) { (a).real*(b), (a).imag*(b) } +static inline dcomplex +dcomplex_div(dcomplex z1, dcomplex z2) +{ + double a = z1.real; + double b = z1.imag; + double c = z2.real; + double d = z2.imag; + double divisor = c * c + d * d; + double real = (a * c + b * d) / divisor; + double imag = (b * c - a * d) / divisor; + dcomplex result = (dcomplex){ real, imag }; + return result; +} +#define dcomplex_div2(a, b) \ + (dcomplex) { (a).real / (b), (a).imag / (b) } +#define dcomplex_abs(x) sqrt(((x).real * (x).real) + ((x).imag * (x).imag)) +#define dconjg(x) \ + (dcomplex) { (x).real, -1.0 * (x).imag } + +extern double +randlc(double*, double); +extern void +vranlc(int, double*, double, double*); +extern void +timer_clear(int); +extern void +timer_start(int); +extern void +timer_stop(int); +extern double +timer_read(int); + +extern void +c_print_results(char* name, char class_npb, int n1, int n2, int n3, int niter, double t, + double mops, char* optype, int passed_verification, char* npbversion, + char* compiletime, char* compilerversion, char* libversion, + char* totalthreads, char* cc, char* clink, char* c_lib, char* c_inc, + char* cflags, char* clinkflags, char* rand); diff --git a/projects/rocprofiler-systems/examples/openmp/common/wtime.cpp b/projects/rocprofiler-systems/examples/openmp/common/wtime.cpp new file mode 100644 index 0000000000..56a9e059a6 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/common/wtime.cpp @@ -0,0 +1,55 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff +*/ + +#include "wtime.hpp" +#include + +void +wtime(double* t) +{ + static int sec = -1; + struct timeval tv; + gettimeofday(&tv, 0); + if(sec < 0) sec = tv.tv_sec; + *t = (tv.tv_sec - sec) + 1.0e-6 * tv.tv_usec; +} diff --git a/projects/rocprofiler-systems/examples/openmp/common/wtime.hpp b/projects/rocprofiler-systems/examples/openmp/common/wtime.hpp new file mode 100644 index 0000000000..e8c39e42eb --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/common/wtime.hpp @@ -0,0 +1,54 @@ +/* +MIT License + +Copyright (c) 2021 Parallel Applications Modelling Group - GMAP + GMAP website: https://gmap.pucrs.br + + Pontifical Catholic University of Rio Grande do Sul (PUCRS) + Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +The original NPB 3.4.1 version was written in Fortran and belongs to: + http://www.nas.nasa.gov/Software/NPB/ + +------------------------------------------------------------------------------ + +The serial C++ version is a translation of the original NPB 3.4.1 +Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER + +Authors of the C++ code: + Dalvan Griebler + Gabriell Araujo + Júnior Löff +*/ + +/* + * C/Fortran interface is different on different machines. + * you may need to tweak this. + */ +#if defined(IBM) +# define wtime wtime +#elif defined(CRAY) +# define wtime WTIME +#else +# define wtime wtime_ +#endif diff --git a/projects/rocprofiler-systems/examples/openmp/target/CMakeLists.txt b/projects/rocprofiler-systems/examples/openmp/target/CMakeLists.txt new file mode 100644 index 0000000000..3dc92ffedf --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/target/CMakeLists.txt @@ -0,0 +1,100 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +if(NOT OMP_TARGET_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} + ENV ROCM_PATH + /opt/rocm + PATHS ${ROCM_PATH} + ENV ROCM_PATH + /opt/rocm + PATH_SUFFIXES bin llvm/bin + ) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(OMP_TARGET_COMPILER + "${amdclangpp_EXECUTABLE}" + CACHE FILEPATH + "OpenMP target compiler" + ) + else() + message(WARNING "OpenMP target compiler not found. Skipping this example.") + return() + endif() +endif() + +project(rocprofiler-systems-example-openmp-target-lib LANGUAGES CXX) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") + +set(DEFAULT_GPU_TARGETS + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx940" + "gfx941" + "gfx942" + "gfx950" + "gfx1030" + "gfx1010" + "gfx1100" + "gfx1101" + "gfx1102" +) + +set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING "GPU targets to compile for") + +find_package(Threads REQUIRED) + +add_library(openmp-target-lib SHARED) +target_sources(openmp-target-lib PRIVATE library.cpp) +target_link_libraries(openmp-target-lib PUBLIC Threads::Threads) +target_compile_options(openmp-target-lib PRIVATE -fopenmp -ggdb) +target_link_options(openmp-target-lib PUBLIC -fopenmp) + +foreach(_TARGET ${GPU_TARGETS}) + target_compile_options(openmp-target-lib PRIVATE --offload-arch=${_TARGET}) + target_link_options(openmp-target-lib PUBLIC --offload-arch=${_TARGET}) +endforeach() + +message(STATUS "Using OpenMP target compiler: ${OMP_TARGET_COMPILER}") + +get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER} PATH) +get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER_DIR} PATH) + +message(STATUS "Using OpemMP target compiler directory: ${OMP_TARGET_COMPILER_DIR}") + +if(NOT EXISTS ${OMP_TARGET_COMPILER_DIR}/llvm/lib) + message(FATAL_ERROR "${OMP_TARGET_COMPILER_DIR}/llvm/lib does not exist") +endif() +set_target_properties( + openmp-target-lib + PROPERTIES + BUILD_RPATH "${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib" + OUTPUT_NAME "openmp-target" + POSITION_INDEPENDENT_CODE ON +) + +rocprofiler_systems_custom_compilation(TARGET openmp-target-lib + COMPILER ${OMP_TARGET_COMPILER} +) + +add_executable(openmp-target) +target_sources(openmp-target PRIVATE main.cpp) +target_link_libraries(openmp-target PRIVATE openmp-target-lib) +target_compile_options(openmp-target PRIVATE -ggdb) + +set_target_properties( + openmp-target + PROPERTIES + BUILD_RPATH "${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib" + POSITION_INDEPENDENT_CODE ON +) + +rocprofiler_systems_custom_compilation(TARGET openmp-target + COMPILER ${OMP_TARGET_COMPILER} +) diff --git a/projects/rocprofiler-systems/examples/openmp/target/library.cpp b/projects/rocprofiler-systems/examples/openmp/target/library.cpp new file mode 100644 index 0000000000..b676678a1d --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/target/library.cpp @@ -0,0 +1,149 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr float EPS_FLOAT = 1.0e-7f; +constexpr double EPS_DOUBLE = 1.0e-15; + +#pragma omp declare target +template +T +mul(T a, T b) +{ + T c; + c = a * b; + return c; +} +#pragma omp end declare target + +template +void +vmul(T* a, T* b, T* c, int N) +{ +#pragma omp target map(to : a[0 : N], b[0 : N]) map(from : c[0 : N]) +#pragma omp teams distribute parallel for + for(int i = 0; i < N; i++) + { + c[i] = mul(a[i], b[i]); + } +} + +int +run_impl() +{ + std::this_thread::sleep_for(std::chrono::milliseconds{ 50 }); + + constexpr int N = 100000; + constexpr int Nc = N / 100; + int a_i[N], b_i[N], c_i[N], validate_i[N]; + float a_f[N], b_f[N], c_f[N], validate_f[N]; + double a_d[N], b_d[N], c_d[N], validate_d[N]; + int N_errors = 0; + bool flag = false; + +#pragma omp parallel for schedule(dynamic, Nc) + for(int i = 0; i < N; ++i) + { + a_f[i] = a_i[i] = i + 1; + b_f[i] = b_i[i] = i + 2; + a_d[i] = a_i[i]; + b_d[i] = b_i[i]; + validate_i[i] = a_i[i] * b_i[i]; + validate_f[i] = a_f[i] * b_f[i]; + validate_d[i] = a_d[i] * b_d[i]; + } + + for(int i = 0; i < 2; ++i) + { + vmul(a_i, b_i, c_i, N); + vmul(a_f, b_f, c_f, N); + vmul(a_d, b_d, c_d, N); + } + + for(int i = 0; i < N; i++) + { + if(c_i[i] != validate_i[i]) + { + ++N_errors; + // print 1st bad index + if(!flag) + { + printf("First fail: c_i[%d](%d) != validate_i[%d](%d)\n", i, c_i[i], i, + validate_i[i]); + flag = true; + } + } + } + flag = false; + for(int i = 0; i < N; i++) + { + if(fabs(c_f[i] - validate_f[i]) > EPS_FLOAT) + { + ++N_errors; + // print 1st bad index + if(!flag) + { + printf("First fail: c_f[%d](%f) != validate_f[%d](%f)\n", i, + static_cast(c_f[i]), i, + static_cast(validate_f[i])); + flag = true; + } + } + } + flag = false; + for(int i = 0; i < N; i++) + { + if(fabs(c_d[i] - validate_d[i]) > EPS_DOUBLE) + { + ++N_errors; + // print 1st bad index + if(!flag) + { + printf("First fail: c_d[%d](%f) != validate_d[%d](%f)\n", i, c_d[i], i, + validate_d[i]); + flag = true; + } + } + } + + return N_errors; +} + +int +run() +{ +#pragma omp parallel + { + run_impl(); + } + + return 0; +} diff --git a/projects/rocprofiler-systems/examples/openmp/target/main.cpp b/projects/rocprofiler-systems/examples/openmp/target/main.cpp new file mode 100644 index 0000000000..081c03b9b9 --- /dev/null +++ b/projects/rocprofiler-systems/examples/openmp/target/main.cpp @@ -0,0 +1,52 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include + +extern int +run(); + +int +main() +{ + auto N_errors = run(); + auto _ec = EXIT_SUCCESS; + if(N_errors == 0) + { + printf("Success\n"); + } + else + { + printf("Total %d failures\n", N_errors); + printf("Fail\n"); + _ec = EXIT_FAILURE; + } + + return _ec; +} diff --git a/projects/rocprofiler-systems/examples/parallel-overhead/CMakeLists.txt b/projects/rocprofiler-systems/examples/parallel-overhead/CMakeLists.txt new file mode 100644 index 0000000000..c28b326c93 --- /dev/null +++ b/projects/rocprofiler-systems/examples/parallel-overhead/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-parallel-overhead-example LANGUAGES CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +set(CMAKE_BUILD_TYPE "Release") +find_package(Threads REQUIRED) + +add_library(parallel-overhead-compile-options INTERFACE) +target_compile_options(parallel-overhead-compile-options INTERFACE -g) + +add_executable(parallel-overhead parallel-overhead.cpp) +target_link_libraries( + parallel-overhead + PRIVATE Threads::Threads parallel-overhead-compile-options +) + +add_executable(parallel-overhead-locks parallel-overhead.cpp) +target_link_libraries( + parallel-overhead-locks + PRIVATE Threads::Threads parallel-overhead-compile-options +) +target_compile_definitions(parallel-overhead-locks PRIVATE USE_LOCKS=1) + +if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + TARGETS parallel-overhead parallel-overhead-locks + DESTINATION bin + COMPONENT rocprofiler-systems-examples + ) +endif() diff --git a/projects/rocprofiler-systems/examples/parallel-overhead/parallel-overhead.cpp b/projects/rocprofiler-systems/examples/parallel-overhead/parallel-overhead.cpp new file mode 100644 index 0000000000..1eca62b4a5 --- /dev/null +++ b/projects/rocprofiler-systems/examples/parallel-overhead/parallel-overhead.cpp @@ -0,0 +1,119 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(USE_LOCKS) +# define USE_LOCKS 0 +#endif + +#if USE_LOCKS > 0 +# include +using auto_lock_t = std::unique_lock; +long total = 0; +long lock_count = 0; +std::mutex mtx{}; +#else +std::atomic total{ 0 }; +long lock_count = 0; +#endif + +long +fib(long n) __attribute__((noinline)); + +void +run(size_t nitr, long) __attribute__((noinline)); + +long +fib(long n) +{ + return (n < 2) ? n : fib(n - 1) + fib(n - 2); +} + +void +run(size_t nitr, long n) +{ + static std::atomic _tids{ 0 }; + auto _tid = ++_tids; + + std::default_random_engine eng(std::random_device{}() * (100 + _tid)); + std::uniform_int_distribution distr{ n - 2, n + 2 }; + + auto _get_n = [&]() { return distr(eng); }; + + printf("[%i] number of iterations: %zu\n", _tid, nitr); + +#if USE_LOCKS > 0 + for(size_t i = 0; i < nitr; ++i) + { + auto _v = fib(_get_n()); + auto_lock_t _lk{ mtx }; + total += _v; + ++lock_count; + } +#else + long local = 0; + for(size_t i = 0; i < nitr; ++i) + { + local += fib(_get_n()); + } + total += local; +#endif +} + +int +main(int argc, char** argv) +{ + std::string _name = argv[0]; + auto _pos = _name.find_last_of('/'); + if(_pos != std::string::npos) _name = _name.substr(_pos + 1); + + size_t nthread = std::min(16, std::thread::hardware_concurrency()); + size_t nitr = 50000; + long nfib = 10; + + if(argc > 1) nfib = atol(argv[1]); + if(argc > 2) nthread = atol(argv[2]); + if(argc > 3) nitr = atol(argv[3]); + + printf("\n[%s] Threads: %zu\n[%s] Iterations: %zu\n[%s] fibonacci(%li)...\n", + _name.c_str(), nthread, _name.c_str(), nitr, _name.c_str(), nfib); + + bool run_on_main_thread = (USE_LOCKS == 0); + auto nwait = nthread + ((run_on_main_thread) ? 1 : 0); + + pthread_barrier_t _barrier; + pthread_barrier_init(&_barrier, nullptr, nwait); + + auto _run = [&_barrier](size_t nitr, long n) { + pthread_barrier_wait(&_barrier); + run(nitr, n); + }; + + std::vector threads{}; + for(size_t i = 0; i < nthread; ++i) + { + threads.emplace_back(_run, nitr, nfib); + } + + if(run_on_main_thread) + { + _run(nitr, nfib); + } + + for(auto& itr : threads) + itr.join(); + + pthread_barrier_destroy(&_barrier); + + printf("[%s] fibonacci(%li) x %lu = %li\n", _name.c_str(), nfib, nthread, + static_cast(total)); + printf("[%s] number of mutex locks = %li\n", _name.c_str(), lock_count); + + return 0; +} diff --git a/projects/rocprofiler-systems/examples/python/CMakeLists.txt b/projects/rocprofiler-systems/examples/python/CMakeLists.txt new file mode 100644 index 0000000000..d88c0b597b --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-python) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +set(PYTHON_FILES + builtin.py + external.py + source.py + noprofile.py + fill.py +) + +find_package(Python3 COMPONENTS Interpreter) + +if(Python3_FOUND) + set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}") + + foreach(_FILE ${PYTHON_FILES}) + configure_file( + ${PROJECT_SOURCE_DIR}/${_FILE} + ${PROJECT_BINARY_DIR}/${_FILE} + @ONLY + ) + + if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + PROGRAMS ${PROJECT_BINARY_DIR}/${_FILE} + DESTINATION bin + COMPONENT rocprofiler-systems-examples + ) + endif() + endforeach() +endif() diff --git a/projects/rocprofiler-systems/examples/python/builtin.py b/projects/rocprofiler-systems/examples/python/builtin.py new file mode 100755 index 0000000000..19c71f1a5a --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/builtin.py @@ -0,0 +1,48 @@ +#!@PYTHON_EXECUTABLE@ + +import os +import sys +import random + +_prefix = "" + + +def fib(n): + return n if n < 2 else (fib(n - 1) + fib(n - 2)) + + +def inefficient(n): + print(f"[{_prefix}] ... running inefficient({n})") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _arr = [random.random() for _ in range(_len)] + _sum = sum(_arr) + print(f"[{_prefix}] ... sum of {_len} random elements: {_sum}") + return _sum + + +@profile +def run(n): + _ret = 0 + _ret += fib(n) + _ret += inefficient(n) + return _ret + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=3) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=20) + args = parser.parse_args() + + _prefix = os.path.basename(__file__) + print(f"[{_prefix}] Executing {args.num_iterations} iterations...\n") + for i in range(args.num_iterations): + ans = run(args.value) + print(f"[{_prefix}] [{i}] result of run({args.value}) = {ans}\n") diff --git a/projects/rocprofiler-systems/examples/python/external.py b/projects/rocprofiler-systems/examples/python/external.py new file mode 100755 index 0000000000..99879d329b --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/external.py @@ -0,0 +1,47 @@ +#!@PYTHON_EXECUTABLE@ + +import os +import sys +import random + +_prefix = "" + + +def fib(n): + return n if n < 2 else (fib(n - 1) + fib(n - 2)) + + +def inefficient(n): + print(f"[{_prefix}] ... running inefficient({n})") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _arr = [random.random() for _ in range(_len)] + _sum = sum(_arr) + print(f"[{_prefix}] ... sum of {_len} random elements: {_sum}") + return _sum + + +def run(n): + _ret = 0 + _ret += fib(n) + _ret += inefficient(n) + return _ret + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=3) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=20) + args = parser.parse_args() + + _prefix = os.path.basename(__file__) + print(f"[{_prefix}] Executing {args.num_iterations} iterations...\n") + for i in range(args.num_iterations): + ans = run(args.value) + print(f"[{_prefix}] [{i}] result of run({args.value}) = {ans}\n") diff --git a/projects/rocprofiler-systems/examples/python/fill.py b/projects/rocprofiler-systems/examples/python/fill.py new file mode 100755 index 0000000000..c1d5de1bbf --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/fill.py @@ -0,0 +1,39 @@ +#!@PYTHON_EXECUTABLE@ + +import os +import sys +import time +import rocprofsys +from rocprofsys.user import region as omni_user_region +from rocprofsys.profiler import config as omni_config + +_prefix = "" + + +def loop(n): + pass + + +@rocprofsys.profile() +def run(i, n, v): + for l in range(n * n): + loop(v + l) + return v + (n * n) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=100) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=10) + args = parser.parse_args() + + omni_config.include_args = True + _prefix = os.path.basename(__file__) + print(f"[{_prefix}] Executing {args.num_iterations} iterations...\n") + ans = 0 + for i in range(args.num_iterations): + beg = ans + ans = run(i, args.value, beg) + print(f"[{_prefix}] [{i}] result of run({args.value}, {beg}) = {ans}") diff --git a/projects/rocprofiler-systems/examples/python/noprofile.py b/projects/rocprofiler-systems/examples/python/noprofile.py new file mode 100755 index 0000000000..f2566f23ef --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/noprofile.py @@ -0,0 +1,50 @@ +#!@PYTHON_EXECUTABLE@ + +import os +import sys +import random + +_prefix = "" + + +@noprofile +def fib(n): + return n if n < 2 else (fib(n - 1) + fib(n - 2)) + + +@noprofile +def inefficient(n): + print(f"[{_prefix}] ... running inefficient({n})") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _arr = [random.random() for _ in range(_len)] + _sum = sum(_arr) + print(f"[{_prefix}] ... sum of {_len} random elements: {_sum}") + return _sum + + +@profile +def run(n): + _ret = 0 + _ret += fib(n) + _ret += inefficient(n) + return _ret + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=3) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=20) + args = parser.parse_args() + + _prefix = os.path.basename(__file__) + print(f"[{_prefix}] Executing {args.num_iterations} iterations...\n") + for i in range(args.num_iterations): + ans = run(args.value) + print(f"[{_prefix}] [{i}] result of run({args.value}) = {ans}\n") diff --git a/projects/rocprofiler-systems/examples/python/source-numpy.py b/projects/rocprofiler-systems/examples/python/source-numpy.py new file mode 100755 index 0000000000..c008749241 --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/source-numpy.py @@ -0,0 +1,83 @@ +#!@PYTHON_EXECUTABLE@ + +import os +import sys +import time +import rocprofsys +from rocprofsys.user import region as omni_user_region + +_prefix = "" + + +def fib(n): + return n if n < 2 else (fib(n - 1) + fib(n - 2)) + + +try: + import numpy as np + + def inefficient(n): + print(f"[{_prefix}] ... running inefficient({n}) (1)") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _ret = np.random.rand(_len).sum() + print(f"[{_prefix}] ... sum of {_len} random elements: {_ret}") + return _ret + +except ImportError as e: + print(f"ImportError: {e}") + import random + + def _sum(arr): + print(f"---- in _sum") + return sum(arr) + + def inefficient(n): + print(f"[{_prefix}] ... running inefficient({n})") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _arr = [random.random() for _ in range(_len)] + _ret = _sum(_arr) + print(f"[{_prefix}] ... sum of {_len} random elements: {_ret}") + return _ret + + +@rocprofsys.profile() +def run(n): + _ret = 0 + _ret += fib(n) + _ret += inefficient(n) + return _ret + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=3) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=20) + parser.add_argument( + "-s", + "--stop-profile", + help="Stop tracing after given iterations", + type=int, + default=0, + ) + args = parser.parse_args() + + _prefix = os.path.basename(__file__) + print(f"[{_prefix}] Executing {args.num_iterations} iterations...\n") + for i in range(args.num_iterations): + with omni_user_region(f"main_loop"): + if args.stop_profile > 0 and i == args.stop_profile: + rocprofsys.user.stop_trace() + ans = run(args.value) + print(f"[{_prefix}] [{i}] result of run({args.value}) = {ans}\n") diff --git a/projects/rocprofiler-systems/examples/python/source.py b/projects/rocprofiler-systems/examples/python/source.py new file mode 100755 index 0000000000..3ec7b9445b --- /dev/null +++ b/projects/rocprofiler-systems/examples/python/source.py @@ -0,0 +1,66 @@ +#!@PYTHON_EXECUTABLE@ + +import os +import sys +import time +import rocprofsys +from rocprofsys.user import region as omni_user_region +import random + +_prefix = "" + + +def fib(n): + return n if n < 2 else (fib(n - 1) + fib(n - 2)) + + +def _sum(arr): + print(f"---- in _sum") + return sum(arr) + + +def inefficient(n): + print(f"[{_prefix}] ... running inefficient({n})") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _arr = [random.random() for _ in range(_len)] + _ret = _sum(_arr) + print(f"[{_prefix}] ... sum of {_len} random elements: {_ret}") + return _ret + + +@rocprofsys.profile() +def run(n): + _ret = 0 + _ret += fib(n) + _ret += inefficient(n) + return _ret + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=3) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=20) + parser.add_argument( + "-s", + "--stop-profile", + help="Stop tracing after given iterations", + type=int, + default=0, + ) + args = parser.parse_args() + + _prefix = os.path.basename(__file__) + print(f"[{_prefix}] Executing {args.num_iterations} iterations...\n") + for i in range(args.num_iterations): + with omni_user_region(f"main_loop"): + if args.stop_profile > 0 and i == args.stop_profile: + rocprofsys.user.stop_trace() + ans = run(args.value) + print(f"[{_prefix}] [{i}] result of run({args.value}) = {ans}\n") diff --git a/projects/rocprofiler-systems/examples/rccl/CMakeLists.txt b/projects/rocprofiler-systems/examples/rccl/CMakeLists.txt new file mode 100644 index 0000000000..6546183efb --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/CMakeLists.txt @@ -0,0 +1,123 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-rccl-example LANGUAGES CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +function(rccl_message _MSG_TYPE) + if( + "${CMAKE_PROJECT_NAME}" STREQUAL "rocprofiler-systems" + AND "$ENV{ROCPROFSYS_CI}" + AND "${_MSG_TYPE}" MATCHES "WARNING" + ) + set(_MSG_TYPE STATUS) # don't generate warnings during CI + endif() + if("${CMAKE_PROJECT_NAME}" STREQUAL "rocprofiler-systems") + rocprofiler_systems_message(${_MSG_TYPE} ${ARGN}) + else() + message(${_MSG_TYPE} ${ARGN}) + endif() +endfunction() + +find_package(hip HINTS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm) + +if(NOT hip_FOUND) + rccl_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing HIP...") + return() +endif() + +if( + "${CMAKE_PROJECT_NAME}" STREQUAL "rocprofiler-systems" + AND ("$ENV{ROCPROFSYS_CI}" OR ROCPROFSYS_CI OR ROCPROFSYS_BUILD_CI) +) + # avoid generating warning in CI + find_package(rccl HINTS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm QUIET) +else() + find_package(rccl HINTS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm) +endif() + +if(NOT rccl_FOUND) + rccl_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing RCCL...") + return() +endif() + +if(hip_FOUND AND rccl_FOUND) + # Use existing rccl-tests source in the project binary directory + set(rccl-tests_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/rccl-tests") + set(rccl-tests_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/rccl-tests") + file(COPY ${rccl-tests_SOURCE_DIR}/ DESTINATION ${rccl-tests_BUILD_DIR}) + + if(NOT EXISTS "${rccl-tests_SOURCE_DIR}") + rccl_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing rccl-tests source at ${rccl-tests_SOURCE_DIR}") + return() + endif() + + get_filename_component(rccl_ROOT_DIR "${rccl_INCLUDE_DIR}" DIRECTORY) + + rccl_message(STATUS "Building rccl-tests...") + execute_process( + COMMAND make HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR} + WORKING_DIRECTORY ${rccl-tests_BUILD_DIR} + RESULT_VARIABLE _RCCL_BUILD_RET + ERROR_VARIABLE _RCCL_BUILD_ERR + OUTPUT_VARIABLE _RCCL_BUILD_OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE + ) + + if(NOT _RCCL_BUILD_RET EQUAL 0) + rccl_message(${_RCCL_BUILD_OUT}) + rccl_message(AUTHOR_WARNING "Failed to build rccl-tests: ${_RCCL_BUILD_ERR}") + else() + file(GLOB RCCL_TEST_EXECUTABLES ${rccl-tests_BUILD_DIR}/build/*_perf) + set(_RCCL_TEST_TARGETS) + + foreach(_EXE ${RCCL_TEST_EXECUTABLES}) + get_filename_component(_EXE_NAME "${_EXE}" NAME) + execute_process( + COMMAND + ${CMAKE_COMMAND} -E copy ${_EXE} + ${CMAKE_CURRENT_BINARY_DIR}/${_EXE_NAME} + ) + add_executable(rccl-tests::${_EXE_NAME} IMPORTED GLOBAL) + set_property( + TARGET rccl-tests::${_EXE_NAME} + PROPERTY IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${_EXE_NAME} + ) + list(APPEND _RCCL_TEST_TARGETS "rccl-tests::${_EXE_NAME}") + endforeach() + + set(RCCL_TEST_TARGETS "${_RCCL_TEST_TARGETS}" CACHE INTERNAL "rccl-test targets") + endif() +endif() diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/Makefile b/projects/rocprofiler-systems/examples/rccl/rccl-tests/Makefile new file mode 100644 index 0000000000..f652b78a99 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/Makefile @@ -0,0 +1,23 @@ +# +# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +# +# See LICENCE.txt for license information +# + +BUILDDIR ?= build +override BUILDDIR := $(abspath $(BUILDDIR)) + +.PHONY: all clean + +default: src.build + +TARGETS=src + +all: ${TARGETS:%=%.build} +clean: ${TARGETS:%=%.clean} + +%.build: + ${MAKE} -C $* build BUILDDIR=${BUILDDIR} + +%.clean: + ${MAKE} -C $* clean BUILDDIR=${BUILDDIR} diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/Makefile b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/Makefile new file mode 100644 index 0000000000..ad6bf4ab7a --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/Makefile @@ -0,0 +1,174 @@ +# +# Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved. +# Modifications are Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE.txt for license information +# + +ROCM_PATH ?= /opt/rocm +MPI_HOME ?= /usr/lib/x86_64-linux-gnu +PREFIX ?= /usr/local +VERBOSE ?= 0 +DEBUG ?= 0 +NCCL_HOME ?= "" +CUSTOM_RCCL_LIB ?= "" + +HIPCC ?= $(ROCM_PATH)/bin/amdclang++ +HIPCONFIG = $(ROCM_PATH)/bin/hipconfig +CXX = $(HIPCC) + +HIPCUFLAGS := -std=c++14 +LDFLAGS := +HIPLDFLAGS := + +HIP_VERSION = $(strip $(shell which $(HIPCONFIG) >/dev/null && $(HIPCONFIG) --version)) +HIP_MAJOR = $(shell echo $(HIP_VERSION) | cut -d "." -f 1) +HIP_MINOR = $(shell echo $(HIP_VERSION) | cut -d "." -f 2) + +# Better define GPU_TARGETS in your environment to the minimal set +# of archs to reduce compile time. +# Currently, supports gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 +ifndef GPU_TARGETS +GPU_TARGETS = gfx906 gfx908 gfx90a + ifeq ($(shell test "0$(HIP_MAJOR)" -eq 6; echo $$?),0) + # Include gfx942 support if we're using ROCm 6.0 or above + GPU_TARGETS += gfx942 + ifeq ($(shell test "0$(HIP_MINOR)" -ge 5; echo $$?),0) + # Include gfx950 support if we're using ROCm 6.5 or above + GPU_TARGETS += gfx950 + endif + endif +GPU_TARGETS += gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201 +endif + +GPU_TARGETS_FLAGS = $(foreach target,$(GPU_TARGETS),"--offload-arch=$(target)") + +#CUDA_VERSION = $(strip $(shell which $(NVCC) >/dev/null && $(NVCC) --version | grep release | sed 's/.*release //' | sed 's/\,.*//')) +#CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1) +#CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2) +# +## Better define NVCC_GENCODE in your environment to the minimal set +## of archs to reduce compile time. +#ifeq ($(shell test "0$(CUDA_MAJOR)" -eq 12 -a "0$(CUDA_MINOR)" -ge 8 -o "0$(CUDA_MAJOR)" -ge 13; echo $$?),0) +## Include Blackwell support if we're using CUDA12.8 or above +#NVCC_GENCODE ?= -gencode=arch=compute_80,code=sm_80 \ +# -gencode=arch=compute_90,code=sm_90 \ +# -gencode=arch=compute_100,code=sm_100 \ +# -gencode=arch=compute_120,code=sm_120 \ +# -gencode=arch=compute_120,code=compute_120 +#else ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 12; echo $$?),0) +#NVCC_GENCODE ?= -gencode=arch=compute_60,code=sm_60 \ +# -gencode=arch=compute_61,code=sm_61 \ +# -gencode=arch=compute_70,code=sm_70 \ +# -gencode=arch=compute_80,code=sm_80 \ +# -gencode=arch=compute_90,code=sm_90 \ +# -gencode=arch=compute_90,code=compute_90 +#else ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 11; echo $$?),0) +#NVCC_GENCODE ?= -gencode=arch=compute_60,code=sm_60 \ +# -gencode=arch=compute_61,code=sm_61 \ +# -gencode=arch=compute_70,code=sm_70 \ +# -gencode=arch=compute_80,code=sm_80 \ +# -gencode=arch=compute_80,code=compute_80 +#else +#NVCC_GENCODE ?= -gencode=arch=compute_35,code=sm_35 \ +# -gencode=arch=compute_50,code=sm_50 \ +# -gencode=arch=compute_60,code=sm_60 \ +# -gencode=arch=compute_61,code=sm_61 \ +# -gencode=arch=compute_70,code=sm_70 \ +# -gencode=arch=compute_70,code=compute_70 +#endif + +ifneq ($(NCCL_HOME), "") +HIPCUFLAGS += -I$(NCCL_HOME)/ -I$(NCCL_HOME)/include +HIPLDFLAGS += -Wl,-rpath,$(NCCL_HOME) -L$(NCCL_HOME) -L$(NCCL_HOME)/lib +endif + +HIPCUFLAGS += -I$(ROCM_PATH)/include +HIPCUFLAGS += -I$(ROCM_PATH)/include/hip +HIPCUFLAGS += -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS) +LDFLAGS += -L$(ROCM_PATH)/lib -lhsa-runtime64 -lrt +ifneq ($(CUSTOM_RCCL_LIB), "") +HIPLDFLAGS += -L$(CUSTOM_RCCL_LIB) +endif +HIPLDFLAGS += -L$(ROCM_PATH)/lib -lhsa-runtime64 -lamdhip64 -lstdc++ -lrt -pthread + +ifeq ($(DEBUG), 0) +HIPCUFLAGS += -O3 +else +HIPCUFLAGS += -O0 -g -ggdb3 +endif + +ifeq ($(VERBOSE), 0) +.SILENT: +endif + +.PHONY: build clean + +BUILDDIR ?= ../build +HIPIFY_DIR ?= $(BUILDDIR)/hipify + +.PRECIOUS: $(HIPIFY_DIR)/%.cpp $(HIPIFY_DIR)/%.h + +ifeq ($(MPI), 1) +HIPCUFLAGS += -DMPI_SUPPORT -I${MPI_HOME}/include -I${MPI_HOME}/include/openmpi -I${MPI_HOME}/openmpi/include -I${MPI_HOME}/openmpi/include/openmpi +HIPLDFLAGS += -L${MPI_HOME}/lib -L${MPI_HOME}/openmpi/lib -lmpi +else ifeq ($(MPICH), 1) +HIPCUFLAGS += -DMPI_SUPPORT -I${MPI_HOME}/include -I${MPI_HOME}/mpich/include -I/usr/include/x86_64-linux-gnu/mpich +HIPLDFLAGS += -L${MPI_HOME}/lib -L${MPI_HOME}/mpich/lib -lmpich +endif + +LIBRARIES += rccl +HIPLDFLAGS += $(LIBRARIES:%=-l%) + +DST_DIR := $(BUILDDIR) +SRC_FILES := $(wildcard *.cpp) +OBJ_FILES := $(SRC_FILES:%.cpp=${DST_DIR}/%.o) +BIN_FILES_LIST := all_reduce all_gather broadcast reduce_scatter reduce alltoall scatter gather sendrecv alltoallv +BIN_FILES := $(BIN_FILES_LIST:%=${DST_DIR}/%_perf) + +GIT_VERSION_FILE := ${DST_DIR}/src/git_version.cpp +GIT_REV := $(shell git log --pretty=format:'%h' -n 1) +GIT_DIFF := $(shell git diff --quiet --exit-code || echo +) +GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD) + +build: ${BIN_FILES} + +clean: + rm -rf ${DST_DIR} + +TEST_VERIFIABLE_SRCDIR := ../verifiable +TEST_VERIFIABLE_BUILDDIR := $(BUILDDIR)/verifiable +include ../verifiable/verifiable.mk + +# Rule to create git_version.cpp +$(GIT_VERSION_FILE): + @mkdir -p ${DST_DIR}/src + @echo 'const char* rcclTestsGitHash = "$(GIT_BRANCH):$(GIT_REV)$(GIT_DIFF)";' > $@ + +${HIPIFY_DIR}/%.cpp: %.cpp + @printf "Hipifying %-35s > %s\n" $< $@ + @mkdir -p ${HIPIFY_DIR} + hipify-perl -quiet-warnings $< > $@ + +${HIPIFY_DIR}/%.h: %.h + @printf "Hipifying %-35s > %s\n" $< $@ + @mkdir -p ${HIPIFY_DIR} + hipify-perl -quiet-warnings $< > $@ + +${DST_DIR}/%.o: ${HIPIFY_DIR}/%.cpp ${HIPIFY_DIR}/common.h $(TEST_VERIFIABLE_HDRS) $(GIT_VERSION_FILE) + @printf "Compiling %-35s > %s\n" $< $@ + @mkdir -p ${DST_DIR} + echo "$(HIPCC) $(HIPCUFLAGS) -I. -c -o $@ $<" + $(HIPCC) $(HIPCUFLAGS) -I. -c -o $@ $< + +${DST_DIR}/timer.o: timer.cc timer.h + @printf "Compiling %-35s > %s\n" $< $@ + @mkdir -p ${DST_DIR} + $(CXX) $(CXXFLAGS) -o $@ -c timer.cc + +${DST_DIR}/%_perf:${DST_DIR}/%.o ${DST_DIR}/common.o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_OBJS) $(DST_DIR)/src/git_version.cpp + @printf "Linking %-35s > %s\n" $< $@ + @mkdir -p ${DST_DIR} + echo "$(HIPCC) -o $@ $^ $(HIPLDFLAGS)" + $(HIPCC) -o $@ $^ $(HIPLDFLAGS) + diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/all_gather.cpp b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/all_gather.cpp new file mode 100644 index 0000000000..a8cc9a1614 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/all_gather.cpp @@ -0,0 +1,110 @@ +/************************************************************************* + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "common.h" +#include "cuda_runtime.h" + +void +AllGatherGetCollByteCount(size_t* sendcount, size_t* recvcount, size_t* paramcount, + size_t* sendInplaceOffset, size_t* recvInplaceOffset, + size_t count, size_t eltSize, int nranks) +{ + size_t base = (count / nranks) & -(16 / eltSize); + *sendcount = base; + *recvcount = base * nranks; + *sendInplaceOffset = base; + *recvInplaceOffset = 0; + *paramcount = base; +} + +testResult_t +AllGatherInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int rep, int in_place) +{ + size_t sendcount = args->sendBytes / wordSize(type); + size_t recvcount = args->expectedBytes / wordSize(type); + int nranks = args->nProcs * args->nThreads * args->nGpus; + + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaSetDevice(args->gpus[i])); + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + CUDACHECK(cudaMemset(args->recvbuffs[i], 0, args->expectedBytes)); + void* data = in_place ? ((char*) args->recvbuffs[i]) + rank * args->sendBytes + : args->sendbuffs[i]; + TESTCHECK(InitData(data, sendcount, 0, type, ncclSum, 33 * rep + rank, 1, 0)); + for(int j = 0; j < nranks; j++) + { + TESTCHECK(InitData((char*) args->expected[i] + args->sendBytes * j, sendcount, + 0, type, ncclSum, 33 * rep + j, 1, 0)); + } + CUDACHECK(cudaDeviceSynchronize()); + } + return testSuccess; +} + +void +AllGatherGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, + int nranks) +{ + double baseBw = (double) (count * typesize * nranks) / 1.0E9 / sec; + + *algBw = baseBw; + double factor = ((double) (nranks - 1)) / ((double) nranks); + *busBw = baseBw * factor; +} + +testResult_t +AllGatherRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, + ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) +{ + NCCLCHECK(ncclAllGather(sendbuff, recvbuff, count, type, comm, stream)); + return testSuccess; +} + +struct testColl allGatherTest = { "AllGather", AllGatherGetCollByteCount, + AllGatherInitData, AllGatherGetBw, AllGatherRunColl }; + +void +AllGatherGetBuffSize(size_t* sendcount, size_t* recvcount, size_t count, int nranks) +{ + size_t paramcount, sendInplaceOffset, recvInplaceOffset; + AllGatherGetCollByteCount(sendcount, recvcount, ¶mcount, &sendInplaceOffset, + &recvInplaceOffset, count, /*eltSize=*/1, nranks); +} + +testResult_t +AllGatherRunTest(struct threadArgs* args, int root, ncclDataType_t type, + const char* typeName, ncclRedOp_t op, const char* opName) +{ + args->collTest = &allGatherTest; + ncclDataType_t* run_types; + const char** run_typenames; + int type_count; + + if((int) type != -1) + { + type_count = 1; + run_types = &type; + run_typenames = &typeName; + } + else + { + type_count = test_typenum; + run_types = test_types; + run_typenames = test_typenames; + } + + for(int i = 0; i < type_count; i++) + { + TESTCHECK( + TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t) 0, "none", -1)); + } + return testSuccess; +} + +struct testEngine ncclTestEngine = { AllGatherGetBuffSize, AllGatherRunTest }; diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/all_reduce.cpp b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/all_reduce.cpp new file mode 100644 index 0000000000..551d2eb8b6 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/all_reduce.cpp @@ -0,0 +1,126 @@ +/************************************************************************* + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "common.h" +#include "cuda_runtime.h" + +void +AllReduceGetCollByteCount(size_t* sendcount, size_t* recvcount, size_t* paramcount, + size_t* sendInplaceOffset, size_t* recvInplaceOffset, + size_t count, size_t eltSize, int nranks) +{ + *sendcount = count; + *recvcount = count; + *sendInplaceOffset = 0; + *recvInplaceOffset = 0; + *paramcount = *sendcount; +} + +testResult_t +AllReduceInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int rep, int in_place) +{ + size_t sendcount = args->sendBytes / wordSize(type); + size_t recvcount = args->expectedBytes / wordSize(type); + int nranks = args->nProcs * args->nThreads * args->nGpus; + + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaSetDevice(args->gpus[i])); + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + CUDACHECK(cudaMemset(args->recvbuffs[i], 0, args->expectedBytes)); + void* data = in_place ? args->recvbuffs[i] : args->sendbuffs[i]; + TESTCHECK(InitData(data, sendcount, 0, type, op, rep, nranks, rank)); + TESTCHECK(InitDataReduce(args->expected[i], recvcount, 0, type, op, rep, nranks)); + CUDACHECK(cudaDeviceSynchronize()); + } + return testSuccess; +} + +void +AllReduceGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, + int nranks) +{ + double baseBw = (double) (count * typesize) / 1.0E9 / sec; + + *algBw = baseBw; + double factor = ((double) (2 * (nranks - 1))) / ((double) nranks); + *busBw = baseBw * factor; +} + +testResult_t +AllReduceRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, + ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) +{ + NCCLCHECK(ncclAllReduce(sendbuff, recvbuff, count, type, op, comm, stream)); + return testSuccess; +} + +struct testColl allReduceTest = { "AllReduce", AllReduceGetCollByteCount, + AllReduceInitData, AllReduceGetBw, AllReduceRunColl }; + +void +AllReduceGetBuffSize(size_t* sendcount, size_t* recvcount, size_t count, int nranks) +{ + size_t paramcount, sendInplaceOffset, recvInplaceOffset; + AllReduceGetCollByteCount(sendcount, recvcount, ¶mcount, &sendInplaceOffset, + &recvInplaceOffset, count, /*eltSize=*/1, nranks); +} + +testResult_t +AllReduceRunTest(struct threadArgs* args, int root, ncclDataType_t type, + const char* typeName, ncclRedOp_t op, const char* opName) +{ + args->collTest = &allReduceTest; + ncclDataType_t* run_types; + ncclRedOp_t* run_ops; + const char ** run_typenames, **run_opnames; + int type_count, op_count; + + if((int) type != -1) + { + type_count = 1; + run_types = &type; + run_typenames = &typeName; + } + else + { + type_count = test_typenum; + run_types = test_types; + run_typenames = test_typenames; + } + + if((int) op != -1) + { + op_count = 1; + run_ops = &op; + run_opnames = &opName; + } + else + { + op_count = test_opnum; + run_ops = test_ops; + run_opnames = test_opnames; + } + + for(int i = 0; i < type_count; i++) + { + for(int j = 0; j < op_count; j++) + { +#if defined(RCCL_FLOAT8) + if((run_types[i] == ncclFp8E4M3 || run_types[i] == ncclFp8E5M2) && + run_ops[j] == ncclProd) + continue; +#endif + TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], run_ops[j], + run_opnames[j], -1)); + } + } + return testSuccess; +} + +struct testEngine ncclTestEngine = { AllReduceGetBuffSize, AllReduceRunTest }; diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/alltoall.cpp b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/alltoall.cpp new file mode 100644 index 0000000000..37c9d19052 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/alltoall.cpp @@ -0,0 +1,112 @@ +/************************************************************************* + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "common.h" +#include "cuda_runtime.h" + +void +AlltoAllGetCollByteCount(size_t* sendcount, size_t* recvcount, size_t* paramcount, + size_t* sendInplaceOffset, size_t* recvInplaceOffset, + size_t count, size_t eltSize, int nranks) +{ + *paramcount = (count / nranks) & -(16 / eltSize); + *sendcount = nranks * (*paramcount); + *recvcount = *sendcount; + *sendInplaceOffset = 0; + *recvInplaceOffset = 0; +} + +testResult_t +AlltoAllInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int rep, int in_place) +{ + size_t sendcount = args->sendBytes / wordSize(type); + size_t recvcount = args->expectedBytes / wordSize(type); + int nranks = args->nProcs * args->nThreads * args->nGpus; + + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaSetDevice(args->gpus[i])); + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + CUDACHECK(cudaMemset(args->recvbuffs[i], 0, args->expectedBytes)); + void* data = in_place ? args->recvbuffs[i] : args->sendbuffs[i]; + TESTCHECK(InitData(data, sendcount, 0, type, ncclSum, 33 * rep + rank, 1, 0)); + for(int j = 0; j < nranks; j++) + { + size_t partcount = sendcount / nranks; + TESTCHECK(InitData((char*) args->expected[i] + j * partcount * wordSize(type), + partcount, rank * partcount, type, ncclSum, 33 * rep + j, + 1, 0)); + } + CUDACHECK(cudaDeviceSynchronize()); + } + // We don't support in-place alltoall + args->reportErrors = in_place ? 0 : 1; + return testSuccess; +} + +void +AlltoAllGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, + int nranks) +{ + double baseBw = (double) (count * nranks * typesize) / 1.0E9 / sec; + + *algBw = baseBw; + double factor = ((double) (nranks - 1)) / ((double) (nranks)); + *busBw = baseBw * factor; +} + +testResult_t +AlltoAllRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, + ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) +{ + NCCLCHECK(ncclAllToAll(sendbuff, recvbuff, count, type, comm, stream)); + return testSuccess; +} + +struct testColl alltoAllTest = { "AlltoAll", AlltoAllGetCollByteCount, AlltoAllInitData, + AlltoAllGetBw, AlltoAllRunColl }; + +void +AlltoAllGetBuffSize(size_t* sendcount, size_t* recvcount, size_t count, int nranks) +{ + size_t paramcount, sendInplaceOffset, recvInplaceOffset; + AlltoAllGetCollByteCount(sendcount, recvcount, ¶mcount, &sendInplaceOffset, + &recvInplaceOffset, count, /*eltSize=*/1, nranks); +} + +testResult_t +AlltoAllRunTest(struct threadArgs* args, int root, ncclDataType_t type, + const char* typeName, ncclRedOp_t op, const char* opName) +{ + args->collTest = &alltoAllTest; + ncclDataType_t* run_types; + const char** run_typenames; + int type_count; + + if((int) type != -1) + { + type_count = 1; + run_types = &type; + run_typenames = &typeName; + } + else + { + type_count = test_typenum; + run_types = test_types; + run_typenames = test_typenames; + } + + for(int i = 0; i < type_count; i++) + { + TESTCHECK( + TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t) 0, "none", -1)); + } + return testSuccess; +} + +struct testEngine ncclTestEngine = { AlltoAllGetBuffSize, AlltoAllRunTest }; diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/alltoallv.cpp b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/alltoallv.cpp new file mode 100644 index 0000000000..78c90775b9 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/alltoallv.cpp @@ -0,0 +1,218 @@ +/************************************************************************* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "common.h" +#include "cuda_runtime.h" + +#define USE_RCCL_GATHER_SCATTER + +void +AlltoAllvGetCollByteCount(size_t* sendcount, size_t* recvcount, size_t* paramcount, + size_t* sendInplaceOffset, size_t* recvInplaceOffset, + size_t count, size_t eltSize, int nranks) +{ + if(count < nranks * nranks / 2) + { + *sendcount = 0; + *recvcount = 0; + *sendInplaceOffset = 0; + *recvInplaceOffset = 0; + *paramcount = 0; + } + else + { + *paramcount = (count / nranks) & -(16 / eltSize); + *sendcount = nranks * (*paramcount); + *recvcount = *sendcount; + *sendInplaceOffset = 0; + *recvInplaceOffset = 0; + } +} + +testResult_t +AlltoAllvInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int rep, int in_place) +{ + size_t sendcount = args->sendBytes / wordSize(type); + size_t recvcount = args->expectedBytes / wordSize(type); + int nranks = args->nProcs * args->nThreads * args->nGpus; + + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaSetDevice(args->gpus[i])); + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + CUDACHECK(cudaMemset(args->recvbuffs[i], 0, args->expectedBytes)); + void* data = in_place ? args->recvbuffs[i] : args->sendbuffs[i]; + TESTCHECK(InitData(data, sendcount, 0, type, ncclSum, 33 * rep + rank, 1, 0)); + +#if 0 + int *dataHost = (int *)malloc(args->sendBytes); + cudaMemcpy(dataHost, data, args->sendBytes, cudaMemcpyDeviceToHost); + printf(" Rank [%d] Original: ", rank); + for(int j=0; jexpected[i]) + rdisp * wordSize(type), + rcount, sdisp, type, ncclSum, 33 * rep + j, 1, 0)); + rdisp += rcount; + } + CUDACHECK(cudaDeviceSynchronize()); + } + // We don't support in-place alltoall + args->reportErrors = in_place ? 0 : 1; + return testSuccess; +} + +void +AlltoAllvGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, + int nranks) +{ + double baseBw = (double) (count * nranks * typesize) / 1.0E9 / sec; + + *algBw = baseBw; + double factor = ((double) (nranks - 1)) / ((double) (nranks)); + *busBw = baseBw * factor; +} + +testResult_t +AlltoAllvRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, + ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) +{ + int nranks; + NCCLCHECK(ncclCommCount(comm, &nranks)); + int rank; + NCCLCHECK(ncclCommUserRank(comm, &rank)); + + if(count == 0) return testSuccess; + + size_t *sendcounts, *recvcounts, *sdispls, *rdispls; + sendcounts = (size_t*) malloc(nranks * nranks * sizeof(size_t)); + recvcounts = (size_t*) malloc(nranks * nranks * sizeof(size_t)); + sdispls = (size_t*) malloc(nranks * nranks * sizeof(size_t)); + rdispls = (size_t*) malloc(nranks * nranks * sizeof(size_t)); + if(sendcounts == nullptr || recvcounts == nullptr || sdispls == nullptr || + rdispls == nullptr) + { + printf("failed to allocate buffers for alltoallv\n"); + return testNcclError; + } + + size_t disp = 0; + size_t chunksize = count * 2 / nranks; + for(int i = 0; i < nranks; i++) + { + size_t scount = ((i + rank) % nranks) * chunksize; + if((i + rank) % nranks == 0) + scount += (count * nranks - chunksize * (nranks - 1) * nranks / 2); + sendcounts[i + rank * nranks] = recvcounts[i + rank * nranks] = scount; + sdispls[i + rank * nranks] = rdispls[i + rank * nranks] = disp; + disp += scount; + // printf("%d->%d: sendcounts/recvcounts %lx sdispls/rdispls %lx\n", rank, i, + // sendcounts[i+rank*nranks]*wordSize(type), + // sdispls[i+rank*nranks]*wordSize(type)); + } + +#if NCCL_MAJOR < 2 || NCCL_MINOR < 7 + printf( + "NCCL 2.7 or later is needed for alltoallv. This test was compiled with %d.%d.\n", + NCCL_MAJOR, NCCL_MINOR); + return testNcclError; +#else +# if defined(RCCL_ALLTOALLV) && defined(USE_RCCL_GATHER_SCATTER) + NCCLCHECK(ncclAllToAllv(sendbuff, sendcounts + rank * nranks, sdispls + rank * nranks, + recvbuff, recvcounts + rank * nranks, rdispls + rank * nranks, + type, comm, stream)); +# else + NCCLCHECK(ncclGroupStart()); + for(int r = 0; r < nranks; r++) + { + if(sendcounts[r + rank * nranks] != 0) + { + NCCLCHECK( + ncclSend(((char*) sendbuff) + sdispls[r + rank * nranks] * wordSize(type), + sendcounts[r + rank * nranks], type, r, comm, stream)); + } + if(recvcounts[r + rank * nranks] != 0) + { + NCCLCHECK( + ncclRecv(((char*) recvbuff) + rdispls[r + rank * nranks] * wordSize(type), + recvcounts[r + rank * nranks], type, r, comm, stream)); + } + } + NCCLCHECK(ncclGroupEnd()); +# endif +#endif + free(sendcounts); + free(recvcounts); + free(sdispls); + free(rdispls); + return testSuccess; +} + +struct testColl alltoAllTest = { "AlltoAllv", AlltoAllvGetCollByteCount, + AlltoAllvInitData, AlltoAllvGetBw, AlltoAllvRunColl }; + +void +AlltoAllvGetBuffSize(size_t* sendcount, size_t* recvcount, size_t count, int nranks) +{ + size_t paramcount, sendInplaceOffset, recvInplaceOffset; + AlltoAllvGetCollByteCount(sendcount, recvcount, ¶mcount, &sendInplaceOffset, + &recvInplaceOffset, count, /*eltSize=*/1, nranks); +} + +testResult_t +AlltoAllvRunTest(struct threadArgs* args, int root, ncclDataType_t type, + const char* typeName, ncclRedOp_t op, const char* opName) +{ + args->collTest = &alltoAllTest; + ncclDataType_t* run_types; + const char** run_typenames; + int type_count; + + if((int) type != -1) + { + type_count = 1; + run_types = &type; + run_typenames = &typeName; + } + else + { + type_count = ncclNumTypes; + run_types = test_types; + run_typenames = test_typenames; + } + + for(int i = 0; i < type_count; i++) + { + TESTCHECK( + TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t) 0, "", -1)); + } + return testSuccess; +} + +struct testEngine ncclTestEngine = { AlltoAllvGetBuffSize, AlltoAllvRunTest }; diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/broadcast.cpp b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/broadcast.cpp new file mode 100644 index 0000000000..b98c3a8fd7 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/broadcast.cpp @@ -0,0 +1,131 @@ +/************************************************************************* + * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "common.h" +#include "cuda_runtime.h" + +void +BroadcastGetCollByteCount(size_t* sendcount, size_t* recvcount, size_t* paramcount, + size_t* sendInplaceOffset, size_t* recvInplaceOffset, + size_t count, size_t eltSize, int nranks) +{ + *sendcount = count; + *recvcount = count; + *sendInplaceOffset = 0; + *recvInplaceOffset = 0; + *paramcount = *sendcount; +} + +testResult_t +BroadcastInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int rep, int in_place) +{ + size_t sendcount = args->sendBytes / wordSize(type); + size_t recvcount = args->expectedBytes / wordSize(type); + + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaSetDevice(args->gpus[i])); + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + CUDACHECK(cudaMemset(args->recvbuffs[i], 0, args->expectedBytes)); + void* data = in_place ? args->recvbuffs[i] : args->sendbuffs[i]; + if(rank == root) + TESTCHECK(InitData(data, sendcount, 0, type, ncclSum, rep, 1, 0)); + TESTCHECK(InitData(args->expected[i], recvcount, 0, type, ncclSum, rep, 1, 0)); + CUDACHECK(cudaDeviceSynchronize()); + } + return testSuccess; +} + +void +BroadcastGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, + int nranks) +{ + double baseBw = (double) (count * typesize) / 1.0E9 / sec; + + *algBw = baseBw; + double factor = 1; + *busBw = baseBw * factor; +} + +testResult_t +BroadcastRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, + ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) +{ + int rank; + NCCLCHECK(ncclCommUserRank(comm, &rank)); +#if NCCL_MAJOR >= 2 && NCCL_MINOR >= 2 + NCCLCHECK(ncclBroadcast(sendbuff, recvbuff, count, type, root, comm, stream)); +#else + if(rank == root) + { + NCCLCHECK(ncclBcast(sendbuff, count, type, root, comm, stream)); + } + else + { + NCCLCHECK(ncclBcast(recvbuff, count, type, root, comm, stream)); + } +#endif + return testSuccess; +} + +struct testColl broadcastTest = { "Broadcast", BroadcastGetCollByteCount, + BroadcastInitData, BroadcastGetBw, BroadcastRunColl }; + +void +BroadcastGetBuffSize(size_t* sendcount, size_t* recvcount, size_t count, int nranks) +{ + size_t paramcount, sendInplaceOffset, recvInplaceOffset; + BroadcastGetCollByteCount(sendcount, recvcount, ¶mcount, &sendInplaceOffset, + &recvInplaceOffset, count, /*eltSize=*/1, nranks); +} + +testResult_t +BroadcastRunTest(struct threadArgs* args, int root, ncclDataType_t type, + const char* typeName, ncclRedOp_t op, const char* opName) +{ + args->collTest = &broadcastTest; + ncclDataType_t* run_types; + const char** run_typenames; + int type_count; + int begin_root, end_root; + + if((int) type != -1) + { + type_count = 1; + run_types = &type; + run_typenames = &typeName; + } + else + { + type_count = test_typenum; + run_types = test_types; + run_typenames = test_typenames; + } + + if(root != -1) + { + begin_root = end_root = root; + } + else + { + begin_root = 0; + end_root = args->nProcs * args->nThreads * args->nGpus - 1; + } + + for(int i = 0; i < type_count; i++) + { + for(int j = begin_root; j <= end_root; j++) + { + TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t) 0, + "none", j)); + } + } + return testSuccess; +} + +struct testEngine ncclTestEngine = { BroadcastGetBuffSize, BroadcastRunTest }; diff --git a/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/common.cpp b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/common.cpp new file mode 100644 index 0000000000..1e7658b975 --- /dev/null +++ b/projects/rocprofiler-systems/examples/rccl/rccl-tests/src/common.cpp @@ -0,0 +1,1987 @@ + +/************************************************************************* + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "common.h" +#include "cuda.h" +#include "cuda_runtime.h" +#include "rccl_float8.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// #define DEBUG_PRINT + +#include "git_version.h" +#include "verifiable.h" + +int test_ncclVersion = 0; // init'd with ncclGetVersion() +int32_t gpu_block3; +size_t cache_bytes = 192 * 1024 * 1024; // Use 192MB + +#if NCCL_MAJOR >= 2 +ncclDataType_t test_types[ncclNumTypes] = { ncclInt8, + ncclUint8, + ncclInt32, + ncclUint32, + ncclInt64, + ncclUint64, + ncclHalf, + ncclFloat, + ncclDouble +# if RCCL_BFLOAT16 == 1 + , + ncclBfloat16 +# endif +# if RCCL_FLOAT8 == 1 + , + ncclFp8E4M3, + ncclFp8E5M2 +# endif +}; +const char* test_typenames[ncclNumTypes] = { "int8", + "uint8", + "int32", + "uint32", + "int64", + "uint64", + "half", + "float", + "double" +# if RCCL_BFLOAT16 == 1 + , + "bfloat16" +# endif +# if RCCL_FLOAT8 == 1 + , + "fp8_e4m3", + "fp8_e5m2" +# endif +}; +int test_typenum = -1; + +const char* test_opnames[] = { "sum", "prod", "max", "min", "avg", "mulsum" }; +ncclRedOp_t test_ops[] = { + ncclSum, + ncclProd, + ncclMax, + ncclMin +# if NCCL_VERSION_CODE >= NCCL_VERSION(2, 10, 0) + , + ncclAvg +# endif +# if NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) + , + ncclNumOps // stand in for ncclRedOpCreatePreMulSum() created on-demand +# endif +}; +int test_opnum = -1; +#else +ncclDataType_t test_types[ncclNumTypes] = { ncclChar, ncclInt, ncclHalf, ncclFloat, + ncclDouble, ncclInt64, ncclUint64 }; +const char* test_typenames[ncclNumTypes] = { "char", "int", "half", "float", + "double", "int64", "uint64" }; +int test_typenum = 7; +const char* test_opnames[] = { "sum", "prod", "max", "min" }; +ncclRedOp_t test_ops[] = { ncclSum, ncclProd, ncclMax, ncclMin }; +int test_opnum = 4; +#endif + +const char* test_memorytypes[nccl_NUM_MTYPES] = { "coarse", "fine", "host", "managed" }; + +// For libnccl's < 2.13 +extern "C" __attribute__((weak)) char const* +ncclGetLastError(ncclComm_t comm) +{ + return ""; +} + +int is_main_proc = 0; +thread_local int is_main_thread = 0; + +// Command line parameter defaults +static int nThreads = 1; +static int nGpus = 1; +static size_t minBytes = 32 * 1024 * 1024; +static size_t maxBytes = 32 * 1024 * 1024; +static size_t stepBytes = 1 * 1024 * 1024; +static size_t stepFactor = 1; +static int datacheck = 1; +static int warmup_iters = 5; +static int iters = 20; +static int agg_iters = 1; +static int run_cycles = 1; +static int ncclop = ncclSum; +static int nccltype = ncclFloat; +static int ncclroot = 0; +static int parallel_init = 0; +static int blocking_coll = 0; +static int memorytype = 0; +static uint32_t cumask[4]; +static int streamnull = 0; +static int timeout = 0; +static int cudaGraphLaunches = 0; +std::string output_file; +std::string output_format; +static int report_cputime = 0; +// Report average iteration time: (0=RANK0,1=AVG,2=MIN,3=MAX) +static int average = 1; +static int numDevices = 1; +static int delay_inout_place = 0; +static int enable_out_of_place = 1; +static int enable_in_place = 1; +static int enable_cache_flush = 0; +static int enable_rotating_tensor = 0; +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) +static int local_register = 0; +#endif + +Reporter::Reporter(std::string fileName, std::string outputFormat) +: _outputFormat(outputFormat) +{ + if(!fileName.empty()) + { + if(isMainThread()) + { + _out = std::ofstream(fileName, std::ios_base::out); + _outputValid = true; + if(_outputFormat == "csv") + { + _out << "numCycle, "; + _out << "collective, "; +#ifdef MPI_SUPPORT + _out << "ranks, rankspernode, gpusperrank, "; +#else + _out << "gpus, "; +#endif + _out << "size, type, redop, inplace, time, algbw, busbw, #wrong\n"; + } + } + } +} + +void +Reporter::setParameters(const size_t numCycle, const char* name, const char* typeName, + const char* opName) +{ + if(!isMainThread() || !_outputValid) return; + + _numCycle = numCycle; + _collectiveName = name; + _typeName = typeName; + _opName = opName; +} + +void +Reporter::addResult(int gpusPerRank, int ranksPerNode, int totalRanks, size_t numBytes, + int inPlace, double timeUsec, double algBw, double busBw, + int64_t wrongElts) +{ + if(!isMainThread() || !_outputValid) return; + + std::vector> outputValuesKeys; + std::string wrongEltsStr = (wrongElts == -1) ? "N/A" : std::to_string(wrongElts); + int nodes = totalRanks / ranksPerNode; + + outputValuesKeys.push_back(makeValueKeyPair(_numCycle, "numCycle")); + outputValuesKeys.push_back(makeValueKeyPair(_collectiveName, "name")); +#ifdef MPI_SUPPORT + outputValuesKeys.push_back(makeValueKeyPair(nodes, "nodes")); + outputValuesKeys.push_back(makeValueKeyPair(totalRanks, "ranks")); + outputValuesKeys.push_back(makeValueKeyPair(ranksPerNode, "ranksPerNode")); + outputValuesKeys.push_back(makeValueKeyPair(gpusPerRank, "gpusPerRank")); +#else + outputValuesKeys.push_back(makeValueKeyPair(gpusPerRank, "gpus")); +#endif + outputValuesKeys.push_back(makeValueKeyPair(numBytes, "size")); + outputValuesKeys.push_back(makeValueKeyPair(_typeName, "type")); + outputValuesKeys.push_back(makeValueKeyPair(_opName, "redop")); + outputValuesKeys.push_back(makeValueKeyPair(inPlace, "inPlace")); + outputValuesKeys.push_back(makeValueKeyPair(timeUsec, "time")); + outputValuesKeys.push_back(makeValueKeyPair(algBw, "algBw")); + outputValuesKeys.push_back(makeValueKeyPair(busBw, "busBw")); + outputValuesKeys.push_back(makeValueKeyPair(wrongEltsStr, "wrong")); + + for(auto iter = outputValuesKeys.begin(); iter != outputValuesKeys.end(); ++iter) + { + if(_outputFormat == "csv") + { + _out << iter->first; + if(std::next(iter) != outputValuesKeys.end()) + { + _out << ", "; + } + } + else + { // json + if(iter == outputValuesKeys.begin()) + { + _out << "{"; + } + _out << "\"" << iter->second << "\":" << iter->first; + if(std::next(iter) != outputValuesKeys.end()) + { + _out << ", "; + } + else + { + _out << "}"; + } + } + } + _out << std::endl; +} + +bool +Reporter::isMainThread() +{ + return is_main_thread == 1; +} + +#define NUM_BLOCKS 32 + +#ifndef CHECK_HIP_ERROR +# define CHECK_HIP_ERROR(error) \ + if(error != hipSuccess) \ + { \ + fprintf(stderr, "Hip error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), \ + error, __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } +#endif + +extern "C" __global__ void +flush_icache() +{ + asm __volatile__("s_icache_inv \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" + "s_nop 0 \n\t" :: + :); +} + +static double +parsesize(const char* value) +{ + long long int units; + double size; + char size_lit[2]; + + int count = sscanf(value, "%lf %1s", &size, size_lit); + + switch(count) + { + case 2: + switch(size_lit[0]) + { + case 'G': + case 'g': units = 1024 * 1024 * 1024; break; + case 'M': + case 'm': units = 1024 * 1024; break; + case 'K': + case 'k': units = 1024; break; + default: return -1.0; + }; + break; + case 1: units = 1; break; + default: return -1.0; + } + + return size * units; +} + +static bool +minReqVersion(int rmajor, int rminor, int rpatch) +{ + int version; + int major, minor, patch, rem; + ncclGetVersion(&version); + + if(version < 10000) + { + major = version / 1000; + rem = version % 1000; + minor = rem / 100; + patch = rem % 100; + } + else + { + major = version / 10000; + rem = version % 10000; + minor = rem / 100; + patch = rem % 100; + } + + if(major < rmajor) + return false; + else if(major > rmajor) + return true; + + // major == rmajor + if(minor < rminor) + return false; + else if(minor > rminor) + return true; + + // major == rmajor && minor == rminor + if(patch < rpatch) return false; + + return true; +} + +testResult_t +CheckDelta(void* results, void* expected, size_t count, size_t offset, + ncclDataType_t type, ncclRedOp_t op, uint64_t seed, int nranks, + int64_t* wrongEltN) +{ + ncclVerifiableVerify(results, expected, count, (int) type, (int) op, nranks, seed, + offset, wrongEltN, cudaStreamDefault); + CUDACHECK(cudaDeviceSynchronize()); + return testSuccess; +} + +testResult_t +InitDataReduce(void* data, const size_t count, const size_t offset, ncclDataType_t type, + ncclRedOp_t op, uint64_t seed, int nranks) +{ + ncclVerifiablePrepareExpected(data, count, (int) type, (int) op, nranks, seed, offset, + cudaStreamDefault); + return testSuccess; +} + +testResult_t +InitData(void* data, const size_t count, size_t offset, ncclDataType_t type, + ncclRedOp_t op, uint64_t seed, int nranks, int rank) +{ + ncclVerifiablePrepareInput(data, count, (int) type, (int) op, nranks, rank, seed, + offset, cudaStreamDefault); + return testSuccess; +} + +void +Barrier(struct threadArgs* args) +{ + thread_local int epoch = 0; + static pthread_mutex_t lock[2] = { PTHREAD_MUTEX_INITIALIZER, + PTHREAD_MUTEX_INITIALIZER }; + static pthread_cond_t cond[2] = { PTHREAD_COND_INITIALIZER, + PTHREAD_COND_INITIALIZER }; + static int counter[2] = { 0, 0 }; + + pthread_mutex_lock(&lock[epoch]); + if(++counter[epoch] == args->nThreads) pthread_cond_broadcast(&cond[epoch]); + + if(args->thread + 1 == args->nThreads) + { + while(counter[epoch] != args->nThreads) + pthread_cond_wait(&cond[epoch], &lock[epoch]); +#ifdef MPI_SUPPORT + MPI_Barrier(MPI_COMM_WORLD); +#endif + counter[epoch] = 0; + pthread_cond_broadcast(&cond[epoch]); + } + else + { + while(counter[epoch] != 0) + pthread_cond_wait(&cond[epoch], &lock[epoch]); + } + pthread_mutex_unlock(&lock[epoch]); + epoch ^= 1; +} + +// Inter-thread/process barrier+allreduce. The quality of the return value +// for average=0 (which means broadcast from rank=0) is dubious. The returned +// value will actually be the result of process-local broadcast from the local thread=0. +template +void +Allreduce(struct threadArgs* args, T* value, int average) +{ + thread_local int epoch = 0; + static pthread_mutex_t lock[2] = { PTHREAD_MUTEX_INITIALIZER, + PTHREAD_MUTEX_INITIALIZER }; + static pthread_cond_t cond[2] = { PTHREAD_COND_INITIALIZER, + PTHREAD_COND_INITIALIZER }; + static T accumulator[2]; + static int counter[2] = { 0, 0 }; + + pthread_mutex_lock(&lock[epoch]); + if(counter[epoch] == 0) + { + if(average != 0 || args->thread == 0) accumulator[epoch] = *value; + } + else + { + switch(average) + { + case /*r0*/ 0: + if(args->thread == 0) accumulator[epoch] = *value; + break; + case /*avg*/ 1: accumulator[epoch] += *value; break; + case /*min*/ 2: + accumulator[epoch] = std::min(accumulator[epoch], *value); + break; + case /*max*/ 3: + accumulator[epoch] = std::max(accumulator[epoch], *value); + break; + case /*sum*/ 4: accumulator[epoch] += *value; break; + } + } + + if(++counter[epoch] == args->nThreads) pthread_cond_broadcast(&cond[epoch]); + + if(args->thread + 1 == args->nThreads) + { + while(counter[epoch] != args->nThreads) + pthread_cond_wait(&cond[epoch], &lock[epoch]); + +#ifdef MPI_SUPPORT + if(average != 0) + { + static_assert(std::is_same::value || + std::is_same::value, + "Allreduce only for T in {long long, double}"); + MPI_Datatype ty = std::is_same::value ? MPI_LONG_LONG + : std::is_same::value ? MPI_DOUBLE + : MPI_Datatype(); + MPI_Op op = average == 1 ? MPI_SUM + : average == 2 ? MPI_MIN + : average == 3 ? MPI_MAX + : average == 4 ? MPI_SUM + : MPI_Op(); + MPI_Allreduce(MPI_IN_PLACE, (void*) &accumulator[epoch], 1, ty, op, + MPI_COMM_WORLD); + } +#endif + + if(average == 1) accumulator[epoch] /= args->totalProcs * args->nThreads; + counter[epoch] = 0; + pthread_cond_broadcast(&cond[epoch]); + } + else + { + while(counter[epoch] != 0) + pthread_cond_wait(&cond[epoch], &lock[epoch]); + } + pthread_mutex_unlock(&lock[epoch]); + + *value = accumulator[epoch]; + epoch ^= 1; +} + +testResult_t +CheckData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int in_place, int64_t* wrongElts) +{ + int nranks = args->nProcs * args->nGpus * args->nThreads; + size_t count = args->expectedBytes / wordSize(type); + + int64_t* wrongPerGpu = nullptr; + CUDACHECK(hipHostMalloc((void**) &wrongPerGpu, args->nGpus * sizeof(int64_t), + cudaHostAllocMapped)); + + for(int i = 0; i < args->nGpus; i++) + { + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + CUDACHECK(cudaSetDevice(args->gpus[i])); + void* data = in_place ? ((void*) ((uintptr_t) args->recvbuffs[i] + + args->recvInplaceOffset * rank)) + : args->recvbuffs[i]; + + TESTCHECK(CheckDelta(data, args->expected[i], count, 0, type, op, 0, nranks, + wrongPerGpu + i)); + +#if 1 && DEBUG_PRINT + if(args->reportErrors && wrongPerGpu[i] != 0) + { + printf("rank=%d #wrong=%d\n", rank, (int) wrongPerGpu[i]); + char* expectedHost = (char*) malloc(args->expectedBytes); + char* dataHost = (char*) malloc(args->expectedBytes); + int eltsz = wordSize(type); + cudaMemcpy(expectedHost, args->expected[i], args->expectedBytes, + cudaMemcpyDeviceToHost); + cudaMemcpy(dataHost, data, args->expectedBytes, cudaMemcpyDeviceToHost); + + for(int j = 0; j < args->expectedBytes / eltsz; j++) + { + unsigned long long want, got; + want = 0; + memcpy(&want, expectedHost + j * eltsz, eltsz); + got = 0; + memcpy(&got, dataHost + j * eltsz, eltsz); + if(want != got) + { + printf(" rank=%d elt[%d]: want=0x%llx got=0x%llx\n", rank, j, want, + got); + } + } + free(expectedHost); + free(dataHost); + } +#endif + } + + *wrongElts = 0; + for(int i = 0; i < args->nGpus; i++) + *wrongElts += wrongPerGpu[i]; + cudaFreeHost(wrongPerGpu); + + if(args->reportErrors && *wrongElts) args->errors[0]++; + return testSuccess; +} + +testResult_t +testStreamSynchronize(int ngpus, cudaStream_t* streams, ncclComm_t* comms) +{ + cudaError_t cudaErr; + int remaining = ngpus; + int* done = (int*) malloc(sizeof(int) * ngpus); + memset(done, 0, sizeof(int) * ngpus); + timer tim; + + while(remaining) + { + int idle = 1; + for(int i = 0; i < ngpus; i++) + { + if(done[i]) continue; + + cudaErr = cudaStreamQuery(streams[i]); + if(cudaErr == cudaSuccess) + { + done[i] = 1; + remaining--; + idle = 0; + continue; + } + + if(cudaErr != cudaErrorNotReady) CUDACHECK(cudaErr); + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 4, 0) + if(test_ncclVersion >= NCCL_VERSION(2, 4, 0) && comms) + { + ncclResult_t ncclAsyncErr; + NCCLCHECK(ncclCommGetAsyncError(comms[i], &ncclAsyncErr)); + if(ncclAsyncErr != ncclSuccess) + { + // An asynchronous error happened. Stop the operation and destroy + // the communicator + for(int i = 0; i < ngpus; i++) + NCCLCHECK(ncclCommAbort(comms[i])); + // Abort the perf test + NCCLCHECK(ncclAsyncErr); + } + } + double delta = tim.elapsed(); + if(delta > timeout && timeout > 0) + { + for(int i = 0; i < ngpus; i++) + NCCLCHECK(ncclCommAbort(comms[i])); + char hostname[1024]; + getHostName(hostname, 1024); + printf("%s: Test timeout (%ds) %s:%d\n", hostname, timeout, __FILE__, + __LINE__); + free(done); + return testTimeout; + } +#endif + } + + // We might want to let other threads (including NCCL threads) use the CPU. + if(idle) sched_yield(); + } + free(done); + return testSuccess; +} + +testResult_t +startColl(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t opIndex, int root, + int in_place, int iter) +{ + size_t count = args->nbytes / wordSize(type); + + // Try to change offset for each iteration so that we avoid cache effects and catch + // race conditions in ptrExchange + size_t shift = 0; + if(enable_rotating_tensor) + { + shift = cache_bytes * (iter % 2); + } + else + { + size_t totalnbytes = std::max(args->sendBytes, args->expectedBytes); + size_t steps = totalnbytes ? args->maxbytes / totalnbytes : 1; + shift = totalnbytes * (iter % steps); + } + + if(args->nGpus > 1) NCCLCHECK(ncclGroupStart()); + for(int i = 0; i < args->nGpus; i++) + { +#ifndef NCCL_MAJOR + CUDACHECK(cudaSetDevice(args->gpus[i])); +#endif + int rank = ((args->proc * args->nThreads + args->thread) * args->nGpus + i); + char* recvBuff = ((char*) args->recvbuffs[i]) + shift; + char* sendBuff = ((char*) args->sendbuffs[i]) + shift; + ncclRedOp_t op; + + if(opIndex < ncclNumOps) + { + op = opIndex; + } +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) + else + { + union + { + int8_t i8; + uint8_t u8; + int32_t i32; + uint32_t u32; + int64_t i64; + uint64_t u64; + half f16; + float f32; + double f64; +# if defined(RCCL_BFLOAT16) + hip_bfloat16 bf16; +# endif +# if defined(RCCL_FLOAT8) + rccl_float8 fp8_e4m3; + rccl_bfloat8 fp8_e5m2; +# endif + }; + switch(type) + { + case ncclInt8: i8 = ncclVerifiablePremulScalar(rank); break; + case ncclUint8: u8 = ncclVerifiablePremulScalar(rank); break; + case ncclInt32: i32 = ncclVerifiablePremulScalar(rank); break; + case ncclUint32: u32 = ncclVerifiablePremulScalar(rank); break; + case ncclInt64: i64 = ncclVerifiablePremulScalar(rank); break; + case ncclUint64: u64 = ncclVerifiablePremulScalar(rank); break; + case ncclFloat16: f16 = ncclVerifiablePremulScalar(rank); break; + case ncclFloat32: f32 = ncclVerifiablePremulScalar(rank); break; + case ncclFloat64: f64 = ncclVerifiablePremulScalar(rank); break; +# if defined(RCCL_BFLOAT16) + case ncclBfloat16: + bf16 = ncclVerifiablePremulScalar(rank); + break; +# endif +# if defined(RCCL_FLOAT8) + case ncclFp8E4M3: + fp8_e4m3 = ncclVerifiablePremulScalar(rank); + break; + case ncclFp8E5M2: + fp8_e5m2 = ncclVerifiablePremulScalar(rank); + break; +# endif + case ncclNumTypes: break; + } + NCCLCHECK(ncclRedOpCreatePreMulSum(&op, &u64, type, ncclScalarHostImmediate, + args->comms[i])); + } +#endif + + if(enable_cache_flush > 0 && ((iter % enable_cache_flush) == 0)) + { + hipLaunchKernelGGL(flush_icache, dim3(gpu_block3), dim3(64), 0, + args->streams[i]); + } + + TESTCHECK(args->collTest->runColl( + (void*) (in_place ? recvBuff + args->sendInplaceOffset * rank : sendBuff), + (void*) (in_place ? recvBuff + args->recvInplaceOffset * rank : recvBuff), + count, type, op, root, args->comms[i], args->streams[i])); + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) + if(opIndex >= ncclNumOps) + { + NCCLCHECK(ncclRedOpDestroy(op, args->comms[i])); + } +#endif + } + if(args->nGpus > 1) NCCLCHECK(ncclGroupEnd()); + + if(blocking_coll) + { + // Complete op before returning + TESTCHECK(testStreamSynchronize(args->nGpus, args->streams, args->comms)); + } + if(blocking_coll) Barrier(args); + return testSuccess; +} + +testResult_t +completeColl(struct threadArgs* args) +{ + if(blocking_coll) return testSuccess; + + TESTCHECK(testStreamSynchronize(args->nGpus, args->streams, args->comms)); + return testSuccess; +} + +testResult_t +BenchTime(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, + int in_place) +{ + size_t count = args->nbytes / wordSize(type); + if(datacheck) + { + // Initialize sendbuffs, recvbuffs and expected + TESTCHECK(args->collTest->initData(args, type, op, root, 99, in_place)); + } + + if(warmup_iters) + { + // Sync + TESTCHECK(startColl(args, type, op, root, in_place, 0)); + TESTCHECK(completeColl(args)); + } + + Barrier(args); + +#if HIP_VERSION >= 50221310 + std::vector graphs(args->nGpus); + std::vector graphExec(args->nGpus); + if(cudaGraphLaunches >= 1) + { + // Begin cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + // Thread local mdoe is needed for: + // - Multi-thread mode: where graph capture and instantiation can happen + // concurrently across threads + // - P2P pre-connect: when there is no warm-up, P2P pre-connect is done during + // graph capture. + // Since pre-connect calls cudaMalloc, we cannot use global capture mode + CUDACHECK(cudaStreamBeginCapture(args->streams[i], + cudaStreamCaptureModeThreadLocal)); + } + } +#endif + + // Performance Benchmark + timer tim; + for(int iter = 0; iter < iters; iter++) + { + if(agg_iters > 1) NCCLCHECK(ncclGroupStart()); + for(int aiter = 0; aiter < agg_iters; aiter++) + { + TESTCHECK( + startColl(args, type, op, root, in_place, iter * agg_iters + aiter)); + } + if(agg_iters > 1) NCCLCHECK(ncclGroupEnd()); + } + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // End cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaStreamEndCapture(args->streams[i], graphs.data() + i)); + } + // Instantiate cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK( + cudaGraphInstantiate(graphExec.data() + i, graphs[i], NULL, NULL, 0)); + } + // Resync CPU, restart timing, launch cuda graph + Barrier(args); + tim.reset(); + for(int l = 0; l < cudaGraphLaunches; l++) + { + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphLaunch(graphExec[i], args->streams[i])); + } + } + } +#endif + + double cputimeSec = tim.elapsed() / (iters * agg_iters); + TESTCHECK(completeColl(args)); + + double deltaSec = tim.elapsed(); + deltaSec = deltaSec / (iters * agg_iters); + if(cudaGraphLaunches >= 1) deltaSec = deltaSec / cudaGraphLaunches; + Allreduce(args, &deltaSec, average); + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // destroy cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphExecDestroy(graphExec[i])); + CUDACHECK(cudaGraphDestroy(graphs[i])); + } + } +#endif + + double algBw, busBw; + args->collTest->getBw(count, wordSize(type), deltaSec, &algBw, &busBw, + args->nProcs * args->nThreads * args->nGpus); + + Barrier(args); + + int64_t wrongElts = 0; + static __thread int rep = 0; + rep++; + for(int c = 0; c < datacheck; c++) + { + // Initialize sendbuffs, recvbuffs and expected + TESTCHECK(args->collTest->initData(args, type, op, root, rep, in_place)); + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // Begin cuda graph capture for data check + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaStreamBeginCapture(args->streams[i], + args->nThreads > 1 + ? cudaStreamCaptureModeThreadLocal + : cudaStreamCaptureModeGlobal)); + } + } +#endif + + // test validation in single itertion, should ideally be included into the + // multi-iteration run + TESTCHECK(startColl(args, type, op, root, in_place, 0)); + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // End cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaStreamEndCapture(args->streams[i], graphs.data() + i)); + } + // Instantiate cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK( + cudaGraphInstantiate(graphExec.data() + i, graphs[i], NULL, NULL, 0)); + } + // Launch cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphLaunch(graphExec[i], args->streams[i])); + } + } +#endif + + TESTCHECK(completeColl(args)); + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // destroy cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphExecDestroy(graphExec[i])); + CUDACHECK(cudaGraphDestroy(graphs[i])); + } + } +#endif + + TESTCHECK(CheckData(args, type, op, root, in_place, &wrongElts)); + + // aggregate delta from all threads and procs + long long wrongElts1 = wrongElts; + // if (wrongElts) fprintf(stderr, "\nERROR: Data corruption : rank %d size %ld + // wrongElts %ld\n", args->proc, args->expectedBytes, wrongElts); + Allreduce(args, &wrongElts1, /*sum*/ 4); + wrongElts = wrongElts1; + if(wrongElts) break; + } + + double timeUsec = (report_cputime ? cputimeSec : deltaSec) * 1.0E6; + char timeStr[100]; + if(timeUsec >= 10000.0) + { + sprintf(timeStr, "%7.0f", timeUsec); + } + else if(timeUsec >= 100.0) + { + sprintf(timeStr, "%7.1f", timeUsec); + } + else + { + sprintf(timeStr, "%7.2f", timeUsec); + } + if(args->reportErrors) + { + PRINT(" %7s %6.2f %6.2f %5g", timeStr, algBw, busBw, (double) wrongElts); + } + else + { + PRINT(" %7s %6.2f %6.2f %5s", timeStr, algBw, busBw, "N/A"); + } + + auto largestMessageSize = std::max(args->sendBytes, args->expectedBytes); + if(args->reporter) + { + if(args->reportErrors) + { + args->reporter->addResult((args->nThreads * args->nGpus), args->nProcs, + args->totalProcs, largestMessageSize, in_place, + timeUsec, algBw, busBw, wrongElts); + } + else + { + args->reporter->addResult((args->nThreads * args->nGpus), args->nProcs, + args->totalProcs, largestMessageSize, in_place, + timeUsec, algBw, busBw); + } + } + + args->bw[0] += busBw; + args->bw_count[0]++; + return testSuccess; +} + +void +setupArgs(size_t size, ncclDataType_t type, struct threadArgs* args) +{ + int nranks = args->nProcs * args->nGpus * args->nThreads; + size_t count, sendCount, recvCount, paramCount, sendInplaceOffset, recvInplaceOffset; + + count = size / wordSize(type); + args->collTest->getCollByteCount(&sendCount, &recvCount, ¶mCount, + &sendInplaceOffset, &recvInplaceOffset, + (size_t) count, wordSize(type), (size_t) nranks); + + args->nbytes = paramCount * wordSize(type); + args->sendBytes = sendCount * wordSize(type); + args->expectedBytes = recvCount * wordSize(type); + args->sendInplaceOffset = sendInplaceOffset * wordSize(type); + args->recvInplaceOffset = recvInplaceOffset * wordSize(type); +} + +testResult_t +TimeTest(struct threadArgs* args, ncclDataType_t type, const char* typeName, + ncclRedOp_t op, const char* opName, int root) +{ + // Sync to avoid first-call timeout + Barrier(args); + + // Warm-up for large size + setupArgs(args->maxbytes, type, args); +#if HIP_VERSION >= 50221310 + std::vector graphs(args->nGpus); + std::vector graphExec(args->nGpus); + if(cudaGraphLaunches >= 1) + { + // Begin cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + // Thread local mode is needed for: + // - Multi-thread mode: where graph capture and instantiation can happen + // concurrently across threads + // - P2P pre-connect: when there is no warm-up, P2P pre-connect is done during + // graph capture. + // Since pre-connect calls cudaMalloc, we cannot use global capture mode + CUDACHECK(cudaStreamBeginCapture(args->streams[i], + cudaStreamCaptureModeThreadLocal)); + } + } +#endif + for(int iter = 0; iter < warmup_iters; iter++) + { + TESTCHECK(startColl(args, type, op, root, 0, iter)); + } + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // End cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaStreamEndCapture(args->streams[i], graphs.data() + i)); + } + // Instantiate cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK( + cudaGraphInstantiate(graphExec.data() + i, graphs[i], NULL, NULL, 0)); + } + // Resync CPU, restart timing, launch cuda graph + Barrier(args); + for(int l = 0; l < cudaGraphLaunches; l++) + { + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphLaunch(graphExec[i], args->streams[i])); + } + } + } +#endif + + TESTCHECK(completeColl(args)); + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // destroy cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphExecDestroy(graphExec[i])); + CUDACHECK(cudaGraphDestroy(graphs[i])); + } + } +#endif + + // Warm-up for small size + setupArgs(args->minbytes, type, args); +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // Begin cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + // Thread local mode is needed for: + // - Multi-thread mode: where graph capture and instantiation can happen + // concurrently across threads + // - P2P pre-connect: when there is no warm-up, P2P pre-connect is done during + // graph capture. + // Since pre-connect calls cudaMalloc, we cannot use global capture mode + CUDACHECK(cudaStreamBeginCapture(args->streams[i], + cudaStreamCaptureModeThreadLocal)); + } + } +#endif + for(int iter = 0; iter < warmup_iters; iter++) + { + TESTCHECK(startColl(args, type, op, root, iter < warmup_iters / 2 ? 0 : 1, iter)); + } + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // End cuda graph capture + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaStreamEndCapture(args->streams[i], graphs.data() + i)); + } + // Instantiate cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK( + cudaGraphInstantiate(graphExec.data() + i, graphs[i], NULL, NULL, 0)); + } + // Resync CPU, restart timing, launch cuda graph + Barrier(args); + for(int l = 0; l < cudaGraphLaunches; l++) + { + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphLaunch(graphExec[i], args->streams[i])); + } + } + } +#endif + + TESTCHECK(completeColl(args)); + +#if HIP_VERSION >= 50221310 + if(cudaGraphLaunches >= 1) + { + // destroy cuda graph + for(int i = 0; i < args->nGpus; i++) + { + CUDACHECK(cudaGraphExecDestroy(graphExec[i])); + CUDACHECK(cudaGraphDestroy(graphs[i])); + } + } +#endif + + // Benchmark + long repeat = run_cycles; + size_t iter = 0; + + do + { + if(run_cycles > 1) PRINT("# Testing %lu cycle.\n", iter + 1); + if(args->reporter) + { + args->reporter->setParameters(iter, args->collTest->name, typeName, opName); + } + for(size_t size = args->minbytes; size <= args->maxbytes; + size = ((args->stepfactor > 1) ? size * args->stepfactor + : size + args->stepbytes)) + { + setupArgs(size, type, args); + char rootName[100]; + sprintf(rootName, "%6i", root); + PRINT("%12li %12li %8s %6s %6s", + std::max(args->sendBytes, args->expectedBytes), + args->nbytes / wordSize(type), typeName, opName, rootName); + if(enable_out_of_place) + { + TESTCHECK(BenchTime(args, type, op, root, 0)); + usleep(delay_inout_place); + } + if(enable_in_place) TESTCHECK(BenchTime(args, type, op, root, 1)); + PRINT("\n"); + } + --repeat; + ++iter; + } while(repeat != 0); + + return testSuccess; +} + +testResult_t +threadRunTests(struct threadArgs* args) +{ + // Set device to the first of our GPUs. If we don't do that, some operations + // will be done on the current GPU (by default : 0) and if the GPUs are in + // exclusive mode those operations will fail. + CUDACHECK(cudaSetDevice(args->gpus[0])); + TESTCHECK(ncclTestEngine.runTest(args, ncclroot, (ncclDataType_t) nccltype, + test_typenames[nccltype], (ncclRedOp_t) ncclop, + test_opnames[ncclop])); + return testSuccess; +} + +testResult_t +threadInit(struct threadArgs* args) +{ + char hostname[1024]; + getHostName(hostname, 1024); + int nranks = args->nProcs * args->nThreads * args->nGpus; + + // set main thread again + is_main_thread = (is_main_proc && args->thread == 0) ? 1 : 0; + + NCCLCHECK(ncclGroupStart()); + for(int i = 0; i < args->nGpus; i++) + { + int rank = + args->proc * args->nThreads * args->nGpus + args->thread * args->nGpus + i; + CUDACHECK(cudaSetDevice(args->gpus[i])); + NCCLCHECK(ncclCommInitRank(args->comms + i, nranks, args->ncclId, rank)); + } + NCCLCHECK(ncclGroupEnd()); +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) + void** sendRegHandles = + (local_register) ? (void**) malloc(sizeof(*sendRegHandles) * args->nGpus) : NULL; + void** recvRegHandles = + (local_register) ? (void**) malloc(sizeof(*recvRegHandles) * args->nGpus) : NULL; + for(int i = 0; i < args->nGpus; i++) + { + if(local_register) + NCCLCHECK(ncclCommRegister(args->comms[i], args->sendbuffs[i], args->maxbytes, + &sendRegHandles[i])); + if(local_register) + NCCLCHECK(ncclCommRegister(args->comms[i], args->recvbuffs[i], args->maxbytes, + &recvRegHandles[i])); + } +#endif + + TESTCHECK(threadRunTests(args)); + + for(int i = 0; i < args->nGpus; i++) + { +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) + if(local_register) + NCCLCHECK(ncclCommDeregister(args->comms[i], sendRegHandles[i])); + if(local_register) + NCCLCHECK(ncclCommDeregister(args->comms[i], recvRegHandles[i])); +#endif + NCCLCHECK(ncclCommDestroy(args->comms[i])); + } + return testSuccess; +} + +void* +threadLauncher(void* thread_) +{ + struct testThread* thread = (struct testThread*) thread_; + thread->ret = thread->func(&thread->args); + return NULL; +} +testResult_t +threadLaunch(struct testThread* thread) +{ + pthread_create(&thread->thread, NULL, threadLauncher, thread); + return testSuccess; +} + +testResult_t +AllocateBuffs(void** sendbuff, size_t sendBytes, void** recvbuff, size_t recvBytes, + void** expected, size_t nbytes) +{ + if(enable_rotating_tensor) + { + recvBytes = recvBytes + cache_bytes; + nbytes = nbytes + cache_bytes; + } + if(memorytype == ncclFine) + { + if(HIP_VERSION >= 50700000) + { + CUDACHECK(hipExtMallocWithFlags(sendbuff, nbytes, hipDeviceMallocUncached)); + CUDACHECK(hipExtMallocWithFlags(recvbuff, nbytes, hipDeviceMallocUncached)); + if(datacheck) + CUDACHECK( + hipExtMallocWithFlags(expected, recvBytes, hipDeviceMallocUncached)); + } + else + { + CUDACHECK( + hipExtMallocWithFlags(sendbuff, nbytes, hipDeviceMallocFinegrained)); + CUDACHECK( + hipExtMallocWithFlags(recvbuff, nbytes, hipDeviceMallocFinegrained)); + if(datacheck) + CUDACHECK(hipExtMallocWithFlags(expected, recvBytes, + hipDeviceMallocFinegrained)); + } + } + else if(memorytype == ncclHost) + { + CUDACHECK(hipHostMalloc(sendbuff, nbytes)); + CUDACHECK(hipHostMalloc(recvbuff, nbytes)); + if(datacheck) CUDACHECK(hipHostMalloc(expected, recvBytes)); + } + else if(memorytype == ncclManaged) + { + CUDACHECK(cudaMallocManaged(sendbuff, nbytes)); + CUDACHECK(cudaMallocManaged(recvbuff, nbytes)); + if(datacheck) CUDACHECK(cudaMallocManaged(expected, recvBytes)); +#if 0 + CUDACHECK(cudaMemset(*sendbuff, 0, nbytes)); + CUDACHECK(cudaMemset(*recvbuff, 0, nbytes)); + if (datacheck) CUDACHECK(cudaMemset(*expected, 0, recvBytes)); +#endif + } + else + { + CUDACHECK(cudaMalloc(sendbuff, nbytes)); + CUDACHECK(cudaMalloc(recvbuff, nbytes)); + if(datacheck) CUDACHECK(cudaMalloc(expected, recvBytes)); + } + CUDACHECK(hipMemset(*sendbuff, 1, nbytes)); + if(datacheck) CUDACHECK(hipMemset(*expected, 1, recvBytes)); + return testSuccess; +} + +testResult_t +run(); // Main function + +int +main(int argc, char* argv[]) +{ + // Make sure everyline is flushed so that we see the progress of the test + setlinebuf(stdout); + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 4, 0) + ncclGetVersion(&test_ncclVersion); +#else + test_ncclVersion = NCCL_VERSION_CODE; +#endif + +// printf("# NCCL_VERSION_CODE=%d ncclGetVersion=%d\n", NCCL_VERSION_CODE, +// test_ncclVersion); +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 0, 0) + test_opnum = 4; + test_typenum = 9; + if(NCCL_VERSION_CODE >= NCCL_VERSION(2, 10, 0) && + test_ncclVersion >= NCCL_VERSION(2, 10, 0)) + { + test_opnum++; // ncclAvg +# if defined(RCCL_BFLOAT16) + test_typenum++; // bfloat16 +# endif +# if defined(RCCL_FLOAT8) + test_typenum++; // fp8_e4m3 + test_typenum++; // fp8_e5m2 +# endif + } + if(NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) && + test_ncclVersion >= NCCL_VERSION(2, 11, 0)) + { + test_opnum++; // PreMulSum + } +#endif + + // Parse args + // Replace getopt_long with manual argument parsing + double parsed; + for(int argi = 1; argi < argc; ++argi) + { + const char* arg = argv[argi]; + if(strcmp(arg, "-t") == 0 || strcmp(arg, "--nthreads") == 0) + { + nThreads = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-g") == 0 || strcmp(arg, "--ngpus") == 0) + { + nGpus = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-b") == 0 || strcmp(arg, "--minbytes") == 0) + { + parsed = parsesize(argv[++argi]); + if(parsed < 0) + { + fprintf(stderr, "invalid size specified for 'minbytes'\n"); + return -1; + } + minBytes = (size_t) parsed; + } + else if(strcmp(arg, "-e") == 0 || strcmp(arg, "--maxbytes") == 0) + { + parsed = parsesize(argv[++argi]); + if(parsed < 0) + { + fprintf(stderr, "invalid size specified for 'maxbytes'\n"); + return -1; + } + maxBytes = (size_t) parsed; + } + else if(strcmp(arg, "-i") == 0 || strcmp(arg, "--stepbytes") == 0) + { + parsed = parsesize(argv[++argi]); + if(parsed < 0) + { + fprintf(stderr, "invalid size specified for 'stepBytes'\n"); + return -1; + } + stepBytes = (size_t) parsed; + } + else if(strcmp(arg, "-f") == 0 || strcmp(arg, "--stepfactor") == 0) + { + stepFactor = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-n") == 0 || strcmp(arg, "--iters") == 0) + { + iters = (int) strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-m") == 0 || strcmp(arg, "--agg_iters") == 0) + { +#if NCCL_MAJOR > 2 || (NCCL_MAJOR >= 2 && NCCL_MINOR >= 2) + agg_iters = (int) strtol(argv[++argi], NULL, 0); +#else + fprintf(stderr, "Option -m not supported before NCCL 2.2. Ignoring\n"); + ++argi; +#endif + } + else if(strcmp(arg, "-w") == 0 || strcmp(arg, "--warmup_iters") == 0) + { + warmup_iters = (int) strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-N") == 0 || strcmp(arg, "--run_cycles") == 0) + { + run_cycles = (int) strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-p") == 0 || strcmp(arg, "--parallel_init") == 0) + { + parallel_init = (int) strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-c") == 0 || strcmp(arg, "--check") == 0) + { + datacheck = (int) strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-o") == 0 || strcmp(arg, "--op") == 0) + { + ncclop = ncclstringtoop(argv[++argi]); + } + else if(strcmp(arg, "-d") == 0 || strcmp(arg, "--datatype") == 0) + { + nccltype = ncclstringtotype(argv[++argi]); + } + else if(strcmp(arg, "-r") == 0 || strcmp(arg, "--root") == 0) + { + ncclroot = ncclstringtoroot(argv[++argi]); + } + else if(strcmp(arg, "-z") == 0 || strcmp(arg, "--blocking") == 0) + { + blocking_coll = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-y") == 0 || strcmp(arg, "--stream_null") == 0) + { + streamnull = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-T") == 0 || strcmp(arg, "--timeout") == 0) + { + timeout = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-G") == 0 || strcmp(arg, "--cudagraph") == 0) + { +#if(NCCL_MAJOR > 2 || (NCCL_MAJOR >= 2 && NCCL_MINOR >= 9)) && HIP_VERSION >= 50221310 + cudaGraphLaunches = strtol(argv[++argi], NULL, 0); +#else + printf("Option -G (HIP graph) not supported before NCCL 2.9 + ROCm 5.2 " + "Ignoring\n"); + ++argi; +#endif + } + else if(strcmp(arg, "-C") == 0 || strcmp(arg, "--report_cputime") == 0) + { + report_cputime = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-a") == 0 || strcmp(arg, "--average") == 0) + { + average = (int) strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-R") == 0 || strcmp(arg, "--local_register") == 0) + { +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) + if((int) strtol(argv[++argi], NULL, 0)) + { + local_register = 1; + } +#else + printf("Option -R (register) is not supported before NCCL 2.19. Ignoring\n"); + ++argi; +#endif + } + else if(strcmp(arg, "-Y") == 0 || strcmp(arg, "--memory_type") == 0) + { + memorytype = ncclstringtomtype(argv[++argi]); + } + else if(strcmp(arg, "-u") == 0 || strcmp(arg, "--cumask") == 0) + { + int nmasks = 0; + char* maskstr = argv[++argi]; + char* mask = strtok(maskstr, ","); + while(mask != NULL && nmasks < 4) + { + cumask[nmasks++] = strtol(mask, NULL, 16); + mask = strtok(NULL, ","); + } + } + else if(strcmp(arg, "-O") == 0 || strcmp(arg, "--out_of_place") == 0) + { + enable_out_of_place = strtol(argv[++argi], NULL, 0); + enable_in_place = enable_out_of_place ? 0 : 1; + } + else if(strcmp(arg, "-q") == 0 || strcmp(arg, "--delay_inout_place") == 0) + { + delay_inout_place = (int) strtol(argv[++argi], NULL, 10); + } + else if(strcmp(arg, "-F") == 0 || strcmp(arg, "--cache_flush") == 0) + { + enable_cache_flush = strtol(argv[++argi], NULL, 0); + if(enable_cache_flush > 0) + { + hipDeviceProp_t deviceProps; + CHECK_HIP_ERROR(hipGetDeviceProperties(&deviceProps, 0)); + gpu_block3 = deviceProps.multiProcessorCount * 60; + } + } + else if(strcmp(arg, "-E") == 0 || strcmp(arg, "--rotating_tensor") == 0) + { + enable_rotating_tensor = strtol(argv[++argi], NULL, 0); + } + else if(strcmp(arg, "-x") == 0 || strcmp(arg, "--output_file") == 0) + { + output_file = argv[++argi]; + } + else if(strcmp(arg, "-Z") == 0 || strcmp(arg, "--output_format") == 0) + { + output_format = argv[++argi]; + } + else if(strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0) + { + printf("USAGE: %s \n\t" + "[-t,--nthreads ] \n\t" + "[-g,--ngpus ] \n\t" + "[-b,--minbytes ] \n\t" + "[-e,--maxbytes ] \n\t" + "[-i,--stepbytes ] \n\t" + "[-f,--stepfactor ] \n\t" + "[-n,--iters ] \n\t" + "[-m,--agg_iters ] \n\t" + "[-w,--warmup_iters ] \n\t" + "[-N,--run_cycles run & print each cycle (default: 1; " + "0=infinite)] \n\t" + "[-p,--parallel_init <0/1>] \n\t" + "[-c,--check ] \n\t" +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) + "[-o,--op ] \n\t" +#elif NCCL_VERSION_CODE >= NCCL_VERSION(2, 10, 0) + "[-o,--op ] \n\t" +#else + "[-o,--op ] \n\t" +#endif + "[-d,--datatype ] \n\t" + "[-r,--root ] \n\t" + "[-z,--blocking <0/1>] \n\t" + "[-y,--stream_null <0/1>] \n\t" + "[-T,--timeout