diff --git a/projects/rocprofiler-compute/.github/CODEOWNERS b/projects/rocprofiler-compute/.github/CODEOWNERS
index f2e47c5c3a..56098b9c73 100644
--- a/projects/rocprofiler-compute/.github/CODEOWNERS
+++ b/projects/rocprofiler-compute/.github/CODEOWNERS
@@ -1,6 +1,7 @@
 * @koomie @coleramos425
 
 # Documentation files
-docs/* @ROCm/rocm-documentation
+docs/ @ROCm/rocm-documentation
 *.md @ROCm/rocm-documentation
 *.rst @ROCm/rocm-documentation
+.readthedocs.yaml @ROCm/rocm-documentation
diff --git a/projects/rocprofiler-compute/.github/workflows/dependabot.yml b/projects/rocprofiler-compute/.github/workflows/dependabot.yml
new file mode 100644
index 0000000000..48d6228bad
--- /dev/null
+++ b/projects/rocprofiler-compute/.github/workflows/dependabot.yml
@@ -0,0 +1,18 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+  - package-ecosystem: "pip" # See documentation for possible values
+    directory: "/docs/sphinx" # Location of package manifests
+    open-pull-requests-limit: 10
+    schedule:
+      interval: "daily"
+    target-branch: "dev"
+    labels:
+      - "documentation"
+      - "dependencies"
+    reviewers:
+      - "samjwu"
diff --git a/projects/rocprofiler-compute/.github/workflows/docs.yml b/projects/rocprofiler-compute/.github/workflows/docs.yml
index 843537adab..7b5f30bbcc 100644
--- a/projects/rocprofiler-compute/.github/workflows/docs.yml
+++ b/projects/rocprofiler-compute/.github/workflows/docs.yml
@@ -4,10 +4,9 @@ on:
   push:
     branches: ["main"]
     paths:
-      - 'src/docs'
-      - 'src/archive/docs-1.x'
+      - 'docs/archive/docs-2.x/**'
+      - 'docs/archive/docs-1.x/**'
       - '.github/workflows/docs.yml'
-      - 'VERSION'
 
   workflow_dispatch:
 
@@ -31,24 +30,24 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
       - name: Additional python packages
-        run: pip3 install -r requirements-doc.txt
+        run: pip3 install -r docs/archive/requirements-doc.txt
       - name: Setup Pages
         uses: actions/configure-pages@v4
       - name: Build 1.x docs
         run: |
-          cd src/archive/docs-1.x
+          cd docs/archive/docs-1.x
           make html	
-      - name: Build current docs
+      - name: Build 2.x docs
         run: |
-          cd src/docs
+          cd docs/archive/docs-2.x
           make html
       - name: Relocate 1.x docs
         run: |
-          mv src/archive/docs-1.x/_build/html src/docs/_build/html/1.x
+          mv docs/archive/docs-1.x/_build/html docs/archive/_build/html/1.x
       - name: Upload artifact
         uses: actions/upload-pages-artifact@v3
         with:
-          path: ./src/docs/_build/html
+          path: ./docs/archive/_build/html
 
   # Deployment job
   deploy:
diff --git a/projects/rocprofiler-compute/.gitignore b/projects/rocprofiler-compute/.gitignore
index 4d6df9d13e..3b3a34d40e 100644
--- a/projects/rocprofiler-compute/.gitignore
+++ b/projects/rocprofiler-compute/.gitignore
@@ -19,3 +19,8 @@ VERSION.sha
 
 # temp files
 /tests/Testing
+
+# documentation artifacts
+/_build
+_toc.yml
+
diff --git a/projects/rocprofiler-compute/.readthedocs.yaml b/projects/rocprofiler-compute/.readthedocs.yaml
new file mode 100644
index 0000000000..ed04e0a35d
--- /dev/null
+++ b/projects/rocprofiler-compute/.readthedocs.yaml
@@ -0,0 +1,13 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+
+python:
+  install:
+  - requirements: docs/sphinx/requirements.txt
diff --git a/projects/rocprofiler-compute/CMakeLists.txt b/projects/rocprofiler-compute/CMakeLists.txt
index 7c7a69595f..9a1632a112 100644
--- a/projects/rocprofiler-compute/CMakeLists.txt
+++ b/projects/rocprofiler-compute/CMakeLists.txt
@@ -189,46 +189,51 @@ message(STATUS "Pytest CPU threadcount: ${PYTEST_NUMPROCS}")
 
 add_test(
     NAME test_profile_kernel_execution
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m kernel_execution --junitxml=tests/test_profile_kernel_execution.xml
-            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -m kernel_execution
+        --junitxml=tests/test_profile_kernel_execution.xml ${COV_OPTION}
+        ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 add_test(
     NAME test_profile_ipblocks
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m block --junitxml=tests/test_profile_blocks.xml ${COV_OPTION}
-            ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -m block --junitxml=tests/test_profile_blocks.xml
+        ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 set_property(TEST test_profile_ipblocks PROPERTY COST 11)
 
 add_test(
     NAME test_profile_dispatch
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m dispatch --junitxml=tests/test_profile_dispatch.xml ${COV_OPTION}
-            ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -m dispatch
+        --junitxml=tests/test_profile_dispatch.xml ${COV_OPTION}
+        ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 set_property(TEST test_profile_ipblocks PROPERTY COST 5)
 
 add_test(
     NAME test_profile_mem
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m mem --junitxml=tests/test_profile_mem.xml ${COV_OPTION}
-            ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND ${Python3_EXECUTABLE} -m pytest -m mem --junitxml=tests/test_profile_mem.xml
+            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 add_test(
     NAME test_profile_join
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m join --junitxml=tests/test_profile_join.xml ${COV_OPTION}
-            ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND ${Python3_EXECUTABLE} -m pytest -m join --junitxml=tests/test_profile_join.xml
+            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 add_test(
     NAME test_profile_sort
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m sort --junitxml=tests/test_profile_sort.xml ${COV_OPTION}
-            ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND ${Python3_EXECUTABLE} -m pytest -m sort --junitxml=tests/test_profile_sort.xml
+            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 add_test(
     NAME test_profile_misc
-    COMMAND ${Python3_EXECUTABLE} -m pytest -m misc --junitxml=tests/test_profile_misc.xml ${COV_OPTION}
-            ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    COMMAND ${Python3_EXECUTABLE} -m pytest -m misc --junitxml=tests/test_profile_misc.xml
+            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 set_tests_properties(
@@ -247,8 +252,10 @@ set_tests_properties(
 
 add_test(
     NAME test_analyze_commands
-    COMMAND ${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS} --junitxml=tests/test_analyze_commands.xml
-            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_analyze_commands.py
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS}
+        --junitxml=tests/test_analyze_commands.xml ${COV_OPTION}
+        ${PROJECT_SOURCE_DIR}/tests/test_analyze_commands.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 # ---------------------------
@@ -257,8 +264,10 @@ add_test(
 
 add_test(
     NAME test_analyze_workloads
-    COMMAND ${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS} --junitxml=tests/test_analyze_workloads.xml
-            ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_analyze_workloads.py
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS}
+        --junitxml=tests/test_analyze_workloads.xml ${COV_OPTION}
+        ${PROJECT_SOURCE_DIR}/tests/test_analyze_workloads.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
 # ---------
diff --git a/projects/rocprofiler-compute/README.md b/projects/rocprofiler-compute/README.md
index 3be1182bdd..cc8cc8b0d1 100644
--- a/projects/rocprofiler-compute/README.md
+++ b/projects/rocprofiler-compute/README.md
@@ -4,17 +4,17 @@
 [![Docs](https://github.com/ROCm/omniperf/actions/workflows/docs.yml/badge.svg)](https://rocm.github.io/omniperf/)
 [![DOI](https://zenodo.org/badge/561919887.svg)](https://zenodo.org/badge/latestdoi/561919887)
 
-
 # Omniperf
 
 ## General
+
 Omniperf is a system performance profiling tool for machine
 learning/HPC workloads running on AMD MI GPUs. The tool presently
 targets usage on MI100, MI200, and MI300 accelerators.
 
 * For more information on available features, installation steps, and
 workload profiling and analysis, please refer to the online
-[documentation](https://rocm.github.io/omniperf).
+[documentation](https://rocm.docs.amd.com/projects/omniperf/en/latest/).
 
 * Omniperf is an AMD open source research project and is not supported
 as part of the ROCm software stack. We welcome contributions and
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/.gitignore b/projects/rocprofiler-compute/docs/archive/docs-1.x/.gitignore
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/.gitignore
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/.gitignore
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/.nojekyll b/projects/rocprofiler-compute/docs/archive/docs-1.x/.nojekyll
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/.nojekyll
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/.nojekyll
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/Makefile b/projects/rocprofiler-compute/docs/archive/docs-1.x/Makefile
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/Makefile
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/Makefile
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/README b/projects/rocprofiler-compute/docs/archive/docs-1.x/README
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/README
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/README
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/VERSION b/projects/rocprofiler-compute/docs/archive/docs-1.x/VERSION
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/VERSION
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/VERSION
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/analysis.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/analysis.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/analysis.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/analysis.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/conf.py b/projects/rocprofiler-compute/docs/archive/docs-1.x/conf.py
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/conf.py
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/conf.py
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/faq.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/faq.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/faq.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/faq.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/getting_started.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/getting_started.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/high_level_design.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/high_level_design.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/high_level_design.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/high_level_design.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Arithmetic_operations.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Arithmetic_operations.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Arithmetic_operations.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Arithmetic_operations.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Command_processor.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Command_processor.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Command_processor.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Command_processor.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Comp_pipe_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Comp_pipe_sol.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Comp_pipe_sol.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Comp_pipe_sol.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Compute_pipeline_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Compute_pipeline_stats.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Compute_pipeline_stats.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Compute_pipeline_stats.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Constant_cache_l2_interface.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_l2_interface.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Constant_cache_l2_interface.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_l2_interface.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Constant_cache_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_stats.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Constant_cache_stats.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Constant_cache_stats.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Current_and_baseline_dispatch_ids.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Instruc_cache_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruc_cache_sol.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Instruc_cache_sol.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruc_cache_sol.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Instruction_cache_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_cache_stats.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Instruction_cache_stats.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_cache_stats.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Instruction_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_mix.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Instruction_mix.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Instruction_mix.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Kernel_time_histogram.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Kernel_time_histogram.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Kernel_time_histogram.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Kernel_time_histogram.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L1D_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1D_sol.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L1D_sol.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1D_sol.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_cache_stalls.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_cache_stalls.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_cache_stalls.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_cache_stalls.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_l2_transactions.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_l2_transactions.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_l2_transactions_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_l2_transactions_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_l2_transactions_per_channel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_utcl1_transactions.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_utcl1_transactions.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L1_utcl1_transactions.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L1_utcl1_transactions.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_cache_accesses.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_accesses.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_cache_accesses.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_accesses.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_cache_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_sol.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_cache_sol.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_cache_sol.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_latencies_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_latencies_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_latencies_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_latencies_per_channel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_stalls.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_stalls.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_stalls_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_stalls_per_channel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_transactions.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_transactions.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_transactions_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_transactions_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_transactions_per_channel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_stalls_per_channel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/L2_ea_write_starvation_per_channel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/LDS_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_sol.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/LDS_sol.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_sol.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/LDS_stats.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_stats.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/LDS_stats.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/LDS_stats.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/MFMA_arithmetic_instruction_mix.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Memory_chart_analysis.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_chart_analysis.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Memory_chart_analysis.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_chart_analysis.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Memory_latencies.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_latencies.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Memory_latencies.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Memory_latencies.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Roofline_analysis.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Roofline_analysis.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Roofline_analysis.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Roofline_analysis.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Shader_processing_input.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Shader_processing_input.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Shader_processing_input.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Shader_processing_input.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/System_info_panel.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_info_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/System_info_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_info_panel.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/System_speed_of_light.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_speed_of_light.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/System_speed_of_light.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/System_speed_of_light.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Texture_address.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_address.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Texture_address.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_address.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Texture_data.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_data.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Texture_data.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Texture_data.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Top_bottleneck_dispatches.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_dispatches.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Top_bottleneck_dispatches.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_dispatches.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Top_bottleneck_kernels.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_kernels.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Top_bottleneck_kernels.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Top_bottleneck_kernels.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/VALU_arithmetic_instruction_mix.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/VMEM_arithmetic_intensity_mix.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Vec_L1D_cache_accesses.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_accesses.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Vec_L1D_cache_accesses.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_accesses.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Vec_L1D_cache_sol.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_sol.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Vec_L1D_cache_sol.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Vec_L1D_cache_sol.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/Wavefront_launch.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/Wavefront_launch.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/Wavefront_launch.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/Wavefront_launch.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/datasource_config.jpg b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_config.jpg
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/datasource_config.jpg
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_config.jpg
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/datasource_settings.jpg b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_settings.jpg
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/datasource_settings.jpg
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/datasource_settings.jpg
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/global_variables.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/global_variables.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/global_variables.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/global_variables.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/grafana_welcome.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_welcome.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/grafana_welcome.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_welcome.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/grafana_workload_selection.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_workload_selection.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/grafana_workload_selection.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/grafana_workload_selection.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/import_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/import_dashboard.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/import_dashboard.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/import_dashboard.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/install_decision_tree.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/install_decision_tree.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/install_decision_tree.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/install_decision_tree.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/omniperf_architecture.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_architecture.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/omniperf_architecture.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_architecture.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/omniperf_server_vs_client_install.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_server_vs_client_install.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/omniperf_server_vs_client_install.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/omniperf_server_vs_client_install.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/opening_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/opening_dashboard.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/opening_dashboard.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/opening_dashboard.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/sample-roof-plot.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/sample-roof-plot.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/sample-roof-plot.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/sample-roof-plot.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/standalone_gui.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/standalone_gui.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/standalone_gui.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/standalone_gui.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/tunnel_demo1.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo1.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/tunnel_demo1.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo1.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/tunnel_demo2.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo2.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/tunnel_demo2.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo2.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/images/tunnel_demo3.png b/projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo3.png
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/images/tunnel_demo3.png
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/images/tunnel_demo3.png
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/index.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/index.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/index.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/index.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/installation.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/installation.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/installation.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/installation.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/introduction.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/introduction.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/introduction.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/introduction.md
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/make.bat b/projects/rocprofiler-compute/docs/archive/docs-1.x/make.bat
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/make.bat
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/make.bat
diff --git a/projects/rocprofiler-compute/src/archive/docs-1.x/profiling.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md
similarity index 100%
rename from projects/rocprofiler-compute/src/archive/docs-1.x/profiling.md
rename to projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md
diff --git a/projects/rocprofiler-compute/src/docs/.gitignore b/projects/rocprofiler-compute/docs/archive/docs-2.x/.gitignore
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/.gitignore
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/.gitignore
diff --git a/projects/rocprofiler-compute/src/docs/.nojekyll b/projects/rocprofiler-compute/docs/archive/docs-2.x/.nojekyll
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/.nojekyll
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/.nojekyll
diff --git a/projects/rocprofiler-compute/src/docs/Makefile b/projects/rocprofiler-compute/docs/archive/docs-2.x/Makefile
similarity index 94%
rename from projects/rocprofiler-compute/src/docs/Makefile
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/Makefile
index 41c270bb32..c3854a5224 100644
--- a/projects/rocprofiler-compute/src/docs/Makefile
+++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/Makefile
@@ -6,7 +6,7 @@
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
-BUILDDIR      = _build
+BUILDDIR      = ../_build
 
 # Put it first so that "make" without argument is like "make help".
 help:
@@ -17,4 +17,4 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/projects/rocprofiler-compute/src/docs/README b/projects/rocprofiler-compute/docs/archive/docs-2.x/README
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/README
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/README
diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/VERSION b/projects/rocprofiler-compute/docs/archive/docs-2.x/VERSION
new file mode 100644
index 0000000000..38f77a65b3
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/VERSION
@@ -0,0 +1 @@
+2.0.1
diff --git a/projects/rocprofiler-compute/src/docs/_static/css/custom.css b/projects/rocprofiler-compute/docs/archive/docs-2.x/_static/css/custom.css
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/_static/css/custom.css
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/_static/css/custom.css
diff --git a/projects/rocprofiler-compute/src/docs/analysis.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/analysis.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/analysis.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/analysis.md
diff --git a/projects/rocprofiler-compute/src/docs/conf.py b/projects/rocprofiler-compute/docs/archive/docs-2.x/conf.py
similarity index 98%
rename from projects/rocprofiler-compute/src/docs/conf.py
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/conf.py
index 9cedb65a5c..b8ff5a0059 100644
--- a/projects/rocprofiler-compute/src/docs/conf.py
+++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/conf.py
@@ -20,8 +20,8 @@ sys.path.insert(0, os.path.abspath(".."))
 
 repo_version = "unknown"
 # Determine short version by file in repo
-if os.path.isfile("../../VERSION"):
-    with open("../../VERSION") as f:
+if os.path.isfile("./VERSION"):
+    with open("./VERSION") as f:
         repo_version = f.readline().strip()
 
 
diff --git a/projects/rocprofiler-compute/src/docs/faq.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/faq.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/faq.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/faq.md
diff --git a/projects/rocprofiler-compute/src/docs/getting_started.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/getting_started.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md
diff --git a/projects/rocprofiler-compute/src/docs/high_level_design.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/high_level_design.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/high_level_design.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/high_level_design.md
diff --git a/projects/rocprofiler-compute/src/docs/images/Current_and_baseline_dispatch_ids.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Current_and_baseline_dispatch_ids.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/Current_and_baseline_dispatch_ids.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/Current_and_baseline_dispatch_ids.png
diff --git a/projects/rocprofiler-compute/src/docs/images/Kernel_time_histogram.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Kernel_time_histogram.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/Kernel_time_histogram.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/Kernel_time_histogram.png
diff --git a/projects/rocprofiler-compute/src/docs/images/L1_l2_transactions_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L1_l2_transactions_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/L1_l2_transactions_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/L1_l2_transactions_per_channel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/L2_ea_latencies_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_latencies_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/L2_ea_latencies_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_latencies_per_channel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/L2_ea_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_stalls_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/L2_ea_stalls_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_stalls_per_channel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/L2_ea_write_stalls_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_stalls_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/L2_ea_write_stalls_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_stalls_per_channel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/L2_ea_write_starvation_per_channel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_starvation_per_channel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/L2_ea_write_starvation_per_channel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/L2_ea_write_starvation_per_channel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/Memory_latencies.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Memory_latencies.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/Memory_latencies.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/Memory_latencies.png
diff --git a/projects/rocprofiler-compute/src/docs/images/Roofline_analysis.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Roofline_analysis.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/Roofline_analysis.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/Roofline_analysis.png
diff --git a/projects/rocprofiler-compute/src/docs/images/Top_bottleneck_dispatches.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_dispatches.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/Top_bottleneck_dispatches.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_dispatches.png
diff --git a/projects/rocprofiler-compute/src/docs/images/Top_bottleneck_kernels.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_kernels.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/Top_bottleneck_kernels.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/Top_bottleneck_kernels.png
diff --git a/projects/rocprofiler-compute/src/docs/images/amd-header-logo.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/amd-header-logo.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/amd-header-logo.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/amd-header-logo.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/cpc_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpc_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cpc_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpc_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cpf_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpf_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cpf_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cpf_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-arith-ops_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-arith-ops_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-arith-ops_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-arith-ops_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-inst-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-inst-mix_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-inst-mix_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-inst-mix_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-mafma-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-mafma-arith-instr-mix_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-mafma-arith-instr-mix_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-mafma-arith-instr-mix_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-pipeline-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-pipeline-stats_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-pipeline-stats_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-pipeline-stats_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-value-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-value-arith-instr-mix_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-value-arith-instr-mix_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-value-arith-instr-mix_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/cu-vmem-instr-mix_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-vmem-instr-mix_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/cu-vmem-instr-mix_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/cu-vmem-instr-mix_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/datasource_config.jpg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_config.jpg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/datasource_config.jpg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_config.jpg
diff --git a/projects/rocprofiler-compute/src/docs/images/datasource_settings.jpg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_settings.jpg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/datasource_settings.jpg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/datasource_settings.jpg
diff --git a/projects/rocprofiler-compute/src/docs/images/fabric.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/fabric.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.png
diff --git a/projects/rocprofiler-compute/src/docs/images/fabric.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/fabric.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/fabric.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/fig_level_counter.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/fig_level_counter.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/fig_level_counter.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/fig_level_counter.png
diff --git a/projects/rocprofiler-compute/src/docs/images/gcn_compute_unit.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/gcn_compute_unit.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/gcn_compute_unit.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/gcn_compute_unit.png
diff --git a/projects/rocprofiler-compute/src/docs/images/global_variables.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/global_variables.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/global_variables.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/global_variables.png
diff --git a/projects/rocprofiler-compute/src/docs/images/grafana_welcome.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_welcome.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/grafana_welcome.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_welcome.png
diff --git a/projects/rocprofiler-compute/src/docs/images/grafana_workload_selection.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_workload_selection.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/grafana_workload_selection.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/grafana_workload_selection.png
diff --git a/projects/rocprofiler-compute/src/docs/images/import_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/import_dashboard.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/import_dashboard.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/import_dashboard.png
diff --git a/projects/rocprofiler-compute/src/docs/images/install_decision_tree.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/install_decision_tree.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/install_decision_tree.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/install_decision_tree.png
diff --git a/projects/rocprofiler-compute/src/docs/images/instr-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-accesses_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/instr-cache-accesses_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-accesses_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/instr-cache-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/instr-cache-sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/instr-cache-sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/l1perf_model.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l1perf_model.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.png
diff --git a/projects/rocprofiler-compute/src/docs/images/l1perf_model.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l1perf_model.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l1perf_model.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/l2-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-accesses_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l2-accesses_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-accesses_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/l2-fabric-interface-stalls_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-interface-stalls_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l2-fabric-interface-stalls_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-interface-stalls_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/l2-fabric-transactions_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-transactions_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l2-fabric-transactions_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-fabric-transactions_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/l2-per-channel-agg-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-per-channel-agg-stats_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l2-per-channel-agg-stats_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-per-channel-agg-stats_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/l2-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/l2-sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/l2-sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/lds-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/lds-sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/lds-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-stats_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/lds-stats_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds-stats_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/lds.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/lds.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.png
diff --git a/projects/rocprofiler-compute/src/docs/images/lds.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/lds.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/lds.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/ldsbandwidth.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ldsbandwidth.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.png
diff --git a/projects/rocprofiler-compute/src/docs/images/ldsbandwidth.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ldsbandwidth.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsbandwidth.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/ldsconflictrate.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ldsconflictrate.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.png
diff --git a/projects/rocprofiler-compute/src/docs/images/ldsconflictrate.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ldsconflictrate.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflictrate.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/ldsconflicts.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ldsconflicts.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.png
diff --git a/projects/rocprofiler-compute/src/docs/images/ldsconflicts.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ldsconflicts.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ldsconflicts.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/memory-chart_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/memory-chart_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/memory-chart_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/memory-chart_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/nosplit.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/nosplit.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.png
diff --git a/projects/rocprofiler-compute/src/docs/images/nosplit.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/nosplit.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/nosplit.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/omniperf_architecture.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_architecture.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/omniperf_architecture.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_architecture.png
diff --git a/projects/rocprofiler-compute/src/docs/images/omniperf_server_vs_client_install.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_server_vs_client_install.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/omniperf_server_vs_client_install.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/omniperf_server_vs_client_install.png
diff --git a/projects/rocprofiler-compute/src/docs/images/opening_dashboard.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/opening_dashboard.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/opening_dashboard.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/opening_dashboard.png
diff --git a/projects/rocprofiler-compute/src/docs/images/roofline_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/roofline_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/roofline_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/roofline_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/sample-roof-plot.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sample-roof-plot.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/sample-roof-plot.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/sample-roof-plot.png
diff --git a/projects/rocprofiler-compute/src/docs/images/selayout.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/selayout.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/selayout.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/selayout.png
diff --git a/projects/rocprofiler-compute/src/docs/images/sl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-cache-accesses_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/sl1d-cache-accesses_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-cache-accesses_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/sl1d-l12-interface_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-l12-interface_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/sl1d-l12-interface_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-l12-interface_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/sl1d-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/sl1d-sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/sl1d-sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/spi-resource-allocation_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-resource-allocation_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/spi-resource-allocation_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-resource-allocation_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/spi-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-stats_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/spi-stats_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/spi-stats_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/split.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/split.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.png
diff --git a/projects/rocprofiler-compute/src/docs/images/split.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/split.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/split.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/standalone_gui.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/standalone_gui.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/standalone_gui.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/standalone_gui.png
diff --git a/projects/rocprofiler-compute/src/docs/images/system-info_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/system-info_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/system-info_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/system-info_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/ta_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/ta_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/ta_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/ta_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/td_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/td_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/td_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/td_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/top-stat_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/top-stat_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/top-stat_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/top-stat_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/tunnel_demo1.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo1.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/tunnel_demo1.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo1.png
diff --git a/projects/rocprofiler-compute/src/docs/images/tunnel_demo2.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo2.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/tunnel_demo2.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo2.png
diff --git a/projects/rocprofiler-compute/src/docs/images/tunnel_demo3.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo3.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/tunnel_demo3.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/tunnel_demo3.png
diff --git a/projects/rocprofiler-compute/src/docs/images/uncached.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/uncached.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.png
diff --git a/projects/rocprofiler-compute/src/docs/images/uncached.svg b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.svg
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/uncached.svg
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/uncached.svg
diff --git a/projects/rocprofiler-compute/src/docs/images/vl1d-addr-translation_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-addr-translation_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/vl1d-addr-translation_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-addr-translation_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/vl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-accesses_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/vl1d-cache-accesses_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-accesses_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/vl1d-cache-stalls_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-stalls_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/vl1d-cache-stalls_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-cache-stalls_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/vl1d-l2-transactions_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-l2-transactions_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/vl1d-l2-transactions_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-l2-transactions_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/vl1d-sol_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-sol_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/vl1d-sol_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/vl1d-sol_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/wavefront-launch-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-launch-stats_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/wavefront-launch-stats_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-launch-stats_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/images/wavefront-runtime-stats_panel.png b/projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-runtime-stats_panel.png
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/images/wavefront-runtime-stats_panel.png
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/images/wavefront-runtime-stats_panel.png
diff --git a/projects/rocprofiler-compute/src/docs/index.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/index.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/index.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/index.md
diff --git a/projects/rocprofiler-compute/src/docs/installation.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/installation.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/installation.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/installation.md
diff --git a/projects/rocprofiler-compute/src/docs/introduction.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/introduction.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/introduction.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/introduction.md
diff --git a/projects/rocprofiler-compute/src/docs/make.bat b/projects/rocprofiler-compute/docs/archive/docs-2.x/make.bat
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/make.bat
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/make.bat
diff --git a/projects/rocprofiler-compute/src/docs/performance_model.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/performance_model.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/performance_model.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/performance_model.md
diff --git a/projects/rocprofiler-compute/src/docs/profiling.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md
similarity index 100%
rename from projects/rocprofiler-compute/src/docs/profiling.md
rename to projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md
diff --git a/projects/rocprofiler-compute/requirements-doc.txt b/projects/rocprofiler-compute/docs/archive/requirements-doc.txt
similarity index 100%
rename from projects/rocprofiler-compute/requirements-doc.txt
rename to projects/rocprofiler-compute/docs/archive/requirements-doc.txt
diff --git a/projects/rocprofiler-compute/docs/conceptual/command-processor.rst b/projects/rocprofiler-compute/docs/conceptual/command-processor.rst
new file mode 100644
index 0000000000..a055768a1f
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/command-processor.rst
@@ -0,0 +1,154 @@
+.. meta::
+   :description: Omniperf performance model: Command processor (CP)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, command, processor, fetcher, packet processor, CPF, CPC
+
+**********************
+Command processor (CP)
+**********************
+
+The command processor (CP) is responsible for interacting with the AMDGPU kernel
+driver -- the Linux kernel -- on the CPU and for interacting with user-space
+HSA clients when they submit commands to HSA queues. Basic tasks of the CP
+include reading commands (such as, corresponding to a kernel launch) out of 
+:hsa-runtime-pdf:`HSA queues <68>`, scheduling work to subsequent parts of the
+scheduler pipeline, and marking kernels complete for synchronization events on
+the host.
+
+The command processor consists of two sub-components:
+
+* :ref:`Fetcher <cpf-metrics>` (CPF): Fetches commands out of memory to hand
+  them over to the CPC for processing.
+
+* :ref:`Packet processor <cpc-metrics>` (CPC): Micro-controller running the
+  command processing firmware that decodes the fetched commands and (for
+  kernels) passes them to the :ref:`workgroup processors <desc-spi>` for
+  scheduling.
+
+Before scheduling work to the accelerator, the command processor can
+first acquire a memory fence to ensure system consistency 
+(:hsa-runtime-pdf:`Section 2.6.4 <91>`). After the work is complete, the
+command processor can apply a memory-release fence. Depending on the AMD CDNA™
+accelerator under question, either of these operations *might* initiate a cache
+write-back or invalidation.
+
+Analyzing command processor performance is most interesting for kernels
+that you suspect to be limited by scheduling or launch rate. The command
+processor’s metrics therefore are focused on reporting, for example:
+
+*  Utilization of the fetcher
+
+*  Utilization of the packet processor, and decoding processing packets
+
+*  Stalls in fetching and processing
+
+.. _cpf-metrics:
+
+Command processor fetcher (CPF)
+===============================
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - CPF Utilization
+
+     - Percent of total cycles where the CPF was busy actively doing any work.
+       The ratio of CPF busy cycles over total cycles counted by the CPF.
+
+     - Percent
+
+   * - CPF Stall
+
+     - Percent of CPF busy cycles where the CPF was stalled for any reason.
+
+     - Percent
+
+   * - CPF-L2 Utilization
+
+     - Percent of total cycles counted by the CPF-:doc:`L2 <l2-cache>` interface
+       where the CPF-L2 interface was active doing any work. The ratio of CPF-L2
+       busy cycles over total cycles counted by the CPF-L2.
+
+     - Percent
+
+   * - CPF-L2 Stall
+
+     - Percent of CPF-:doc:`L2 <l2-cache>` L2 busy cycles where the CPF-L2
+       interface was stalled for any reason.
+
+     - Percent
+
+   * - CPF-UTCL1 Stall
+
+     - Percent of CPF busy cycles where the CPF was stalled by address
+       translation. 
+
+     - Percent
+
+.. _cpc-metrics:
+
+Command processor packet processor (CPC)
+========================================
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - CPC Utilization
+
+     - Percent of total cycles where the CPC was busy actively doing any work.
+       The ratio of CPC busy cycles over total cycles counted by the CPC.
+
+     - Percent
+
+   * - CPC Stall
+
+     - Percent of CPC busy cycles where the CPC was stalled for any reason.
+
+     - Percent
+
+   * - CPC Packet Decoding Utilization
+
+     - Percent of CPC busy cycles spent decoding commands for processing.
+
+     - Percent
+
+   * - CPC-Workgroup Manager Utilization
+
+     - Percent of CPC busy cycles spent dispatching workgroups to the
+       :ref:`workgroup manager <desc-spi>`.
+
+     - Percent
+
+   * - CPC-L2 Utilization
+
+     - Percent of total cycles counted by the CPC-:doc:`L2 <l2-cache>` interface
+       where the CPC-L2 interface was active doing any work.
+
+     - Percent
+
+   * - CPC-UTCL1 Stall
+
+     - Percent of CPC busy cycles where the CPC was stalled by address
+       translation.
+
+     - Percent
+
+   * - CPC-UTCL2 Utilization
+
+     - Percent of total cycles counted by the CPC's :doc:`L2 <l2-cache>` address
+       translation interface where the CPC was busy doing address translation
+       work.
+
+     - Percent
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/compute-unit.rst b/projects/rocprofiler-compute/docs/conceptual/compute-unit.rst
new file mode 100644
index 0000000000..e7061c814e
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/compute-unit.rst
@@ -0,0 +1,60 @@
+.. meta::
+   :description: Omniperf performance model: Compute unit (CU)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, GCN, compute, unit, pipeline, workgroup, wavefront,
+              CDNA
+
+*****************
+Compute unit (CU)
+*****************
+
+The compute unit (CU) is responsible for executing a user's kernels on
+CDNA™-based accelerators. All :ref:`wavefronts <desc-wavefront>` of a
+:ref:`workgroup <desc-workgroup>` are scheduled on the same CU.
+
+.. image:: ../data/performance-model/gcn_compute_unit.png
+   :align: center
+   :alt: AMD CDNA accelerator compute unit diagram
+   :width: 800 
+
+The CU consists of several independent execution pipelines and functional units.
+The :doc:`/conceptual/pipeline-descriptions` section details the various
+execution pipelines -- VALU, SALU, LDS, scheduler, and so forth. The metrics
+presented by Omniperf for these pipelines are described in
+:doc:`pipeline-metrics`. The :doc:`vL1D <vector-l1-cache>` cache and
+:doc:`LDS <local-data-share>` are described in their own sections.
+
+* The :ref:`desc-valu` is composed of multiple SIMD (single
+  instruction, multiple data) vector processors, vector general purpose
+  registers (VGPRs) and instruction buffers. The VALU is responsible for
+  executing much of the computational work on CDNA accelerators, including but
+  not limited to floating-point operations (FLOPs) and integer operations
+  (IOPs).
+
+* The vector memory (VMEM) unit is responsible for issuing loads, stores and
+  atomic operations that interact with the memory system.
+
+* The :ref:`desc-salu` is shared by all threads in a
+  :ref:`wavefront <desc-wavefront>`, and is responsible for executing
+  instructions that are known to be uniform across the wavefront at compile
+  time. The SALU has a memory unit (SMEM) for interacting with memory, but it
+  cannot issue separately from the SALU.
+
+* The :doc:`local-data-share` is an on-CU software-managed scratchpad memory
+  that can be used to efficiently share data between all threads in a
+  :ref:`workgroup <desc-workgroup>`.
+
+* The :ref:`desc-scheduler` is responsible for issuing and decoding instructions
+  for all the :ref:`wavefronts <desc-wavefront>` on the compute unit.
+
+* The :doc:`vector L1 data cache (vL1D) <vector-l1-cache>` is the first level
+  cache local to the compute unit. On current CDNA accelerators, the vL1D is
+  write-through. The vL1D caches from multiple compute units are kept coherent
+  with one another through software instructions.
+
+* CDNA accelerators -- that is, AMD Instinct™ MI100 and newer -- contain
+  specialized matrix-multiplication accelerator pipelines known as the
+  :ref:`desc-mfma`.
+
+For a more in-depth description of a compute unit on a CDNA accelerator, see
+:hip-training-pdf:`22` and :gcn-crash-course:`27`.
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/definitions.rst b/projects/rocprofiler-compute/docs/conceptual/definitions.rst
new file mode 100644
index 0000000000..8ad483094a
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/definitions.rst
@@ -0,0 +1,152 @@
+.. meta::
+   :description: Omniperf terminology and definitions
+   :keywords: Omniperf, ROCm, glossary, definitions, terms, profiler, tool,
+              Instinct, accelerator, AMD
+
+***********
+Definitions
+***********
+
+The following table briefly defines some terminology used in Omniperf interfaces
+and in this documentation.
+
+.. include:: ./includes/terms.rst
+
+.. include:: ./includes/normalization-units.rst
+
+.. _memory-spaces:
+
+Memory spaces
+=============
+
+AMD Instinct™ MI-series accelerators can access memory through multiple address spaces
+which may map to different physical memory locations on the system. The
+following table provides a view into how various types of memory used
+in HIP map onto these constructs:
+
+.. list-table::
+   :header-rows: 1
+
+   * - LLVM Address Space
+     - Hardware Memory Space
+     - HIP Terminology
+
+   * - Generic
+     - Flat
+     - N/A
+
+   * - Global
+     - Global
+     - Global
+
+   * - Local
+     - LDS
+     - LDS/Shared
+
+   * - Private
+     - Scratch
+     - Private
+
+   * - Constant
+     - Same as global
+     - Constant
+
+The following is a high-level description of the address spaces in the AMDGPU
+backend of LLVM:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Address space
+     - Description
+
+   * - Global
+     - Memory that can be seen by all threads in a process, and may be backed by
+       the local accelerator's HBM, a remote accelerator's HBM, or the CPU's
+       DRAM.
+
+   * - Local
+     - Memory that is only visible to a particular workgroup. On AMD's Instinct
+       accelerator hardware, this is stored in :doc:`LDS <local-data-share>`
+       memory.
+
+   * - Private
+     - Memory that is only visible to a particular [work-item](workitem)
+       (thread), stored in the scratch space on AMD's Instinct accelerators.
+
+   * - Constant
+     - Read-only memory that is in the global address space and stored on the
+       local accelerator's HBM.
+
+   * - Generic
+     - Used when the compiler cannot statically prove that a pointer is
+       addressing memory in a single (non-generic) address space. Mapped to Flat
+       on AMD's Instinct accelerators, the pointer could dynamically address
+       global, local, private or constant memory.
+
+`LLVM's documentation for AMDGPU Backend <https://llvm.org/docs/AMDGPUUsage.html#address-spaces>`_
+has the most up-to-date information. Refer to this source for a more complete
+explanation.
+
+.. _memory-type:
+
+Memory type
+===========
+
+AMD Instinct accelerators contain a number of different memory allocation
+types to enable the HIP language's
+:doc:`memory coherency model <hip:how-to/programming_manual>`.
+These memory types are broadly similar between AMD Instinct accelerator
+generations, but may differ in exact implementation.
+
+In addition, these memory types *might* differ between accelerators on the same
+system, even when accessing the same memory allocation.
+
+For example, an :ref:`MI2XX <mixxx-note>` accelerator accessing *fine-grained*
+memory allocated local to that device may see the allocation as coherently
+cacheable, while a remote accelerator might see the same allocation as
+*uncached*.
+
+These memory types include:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Memory type
+     - Description
+
+   * - Uncached Memory (UC)
+     - Memory that will not be cached in this accelerator. On
+       :ref:`MI2XX <mixxx-note>` accelerators, this corresponds “fine-grained”
+       (or, “coherent”) memory allocated on a remote accelerator or the host,
+       for example, using ``hipHostMalloc`` or ``hipMallocManaged`` with default
+       allocation flags.
+
+   * - Non-hardware-Coherent Memory (NC)
+     - Memory that will be cached by the accelerator, and is only guaranteed to
+       be consistent at kernel boundaries / after software-driven
+       synchronization events. On :ref:`MI2XX <mixxx-note>` accelerators, this
+       type of memory maps to, for example, “coarse-grained” ``hipHostMalloc``’d
+       memory -- that is, allocated with the ``hipHostMallocNonCoherent``
+       flag -- or ``hipMalloc``’d memory allocated on a remote accelerator.
+
+   * - Coherently Cachable (CC)
+     - Memory for which only reads from the accelerator where the memory was
+       allocated will be cached. Writes to CC memory are uncached, and trigger
+       invalidations of any line within this accelerator. On
+       :ref:`MI2XX <mixxx-note>` accelerators, this type of memory maps to
+       “fine-grained” memory allocated on the local accelerator using, for
+       example, the ``hipExtMallocWithFlags`` API using the
+       ``hipDeviceMallocFinegrained`` flag.
+
+   * - Read/Write Coherent Memory (RW)
+     - Memory that will be cached by the accelerator, but may be invalidated by
+       writes from remote devices at kernel boundaries / after software-driven
+       synchronization events. On :ref:`MI2XX <mixxx-note>` accelerators, this
+       corresponds to “coarse-grained” memory allocated locally to the
+       accelerator, using for example, the default ``hipMalloc`` allocator.
+
+Find a good discussion of coarse and fine-grained memory allocations and what
+type of memory is returned by various combinations of memory allocators, flags
+and arguments in the
+`Crusher quick-start guide <https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#floating-point-fp-atomic-operations-and-coarse-fine-grained-memory-allocations>`_.
diff --git a/projects/rocprofiler-compute/docs/conceptual/includes/normalization-units.rst b/projects/rocprofiler-compute/docs/conceptual/includes/normalization-units.rst
new file mode 100644
index 0000000000..34961f7e0a
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/includes/normalization-units.rst
@@ -0,0 +1,47 @@
+.. _normalization-units:
+
+Normalization units
+===================
+
+A user-configurable unit by which you can choose to normalize data. Options
+include:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Name
+     - Description
+
+   * - ``per_wave``
+     - The total value of the measured counter or metric that occurred per
+       kernel invocation divided by the total number of
+       :ref:`wavefronts <desc-wavefront>` launched in the kernel.
+
+   * - ``per_cycle``
+     - The total value of the measured counter or metric that occurred per
+       kernel invocation divided by the
+       :ref:`kernel cycles <kernel-cycles>`, that is, the total number of
+       cycles the kernel executed as measured by the
+       :doc:`command processor <command-processor>`.
+
+   * - ``per_kernel``
+     - The total value of the measured counter or metric that occurred per
+       kernel invocation.
+
+   * - ``per_second``
+     - The total value of the measured counter or metric that occurred per
+       kernel invocation divided by the :ref:`kernel time <kernel-time>`,
+       that is, the total runtime of the kernel in seconds, as measured by the
+       :doc:`command processor <command-processor>`.
+
+By default, Omniperf uses the ``per_wave`` normalization.
+
+.. tip::
+
+   The best normalization may vary depending on your use case. For instance, a
+   ``per_second`` normalization might be useful for FLOP or bandwidth
+   comparisons, while a ``per_wave`` normalization could be useful to see how many
+   (and what types) of instructions are used per wavefront. A ``per_kernel``
+   normalization can be useful to get the total aggregate values of metrics for
+   comparison between different configurations.
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/includes/terms.rst b/projects/rocprofiler-compute/docs/conceptual/includes/terms.rst
new file mode 100644
index 0000000000..bc0080bdb9
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/includes/terms.rst
@@ -0,0 +1,188 @@
+.. _desc-workgroup:
+
+.. _desc-work-item:
+
+.. _desc-wavefront:
+
+.. _desc-divergence:
+
+.. _kernel-time:
+
+.. _kernel-cycles:
+
+.. _total-active-cu-cycles:
+
+.. _total-cu-cycles:
+
+.. _total-se-cycles:
+
+.. _total-simd-cycles:
+
+.. _total-pipe-cycles:
+
+.. _total-l1i-cycles:
+
+.. _total-active-l2-cycles:
+
+.. _total-l2-cycles:
+
+.. _total-sl1d-cycles:
+
+.. _thread-requests:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Name
+
+     - Description
+
+     - Unit
+
+   * - Kernel time
+
+     - The number of seconds the accelerator was executing a kernel, from the
+       :doc:`command processor <command-processor>`'s (CP) start-of-kernel
+       timestamp (a number of cycles after the CP beings processing the packet)
+       to the CP's end-of-kernel timestamp (a number of cycles before the CP
+       stops processing the packet).
+
+     - Seconds
+
+   * - Kernel cycles
+
+     - The number of cycles the accelerator was active doing *any* work, as
+       measured by the :doc:`command processor <command-processor>` (CP).
+
+     - Cycles
+
+   * - Total CU cycles
+
+     - The number of cycles the accelerator was active doing *any* work
+       (that is, kernel cycles), multiplied by the number of
+       :doc:`compute units <compute-unit>` on the accelerator. A
+       measure of the total possible active cycles the compute units could be
+       doing work, useful for the normalization of metrics inside the CU.
+
+     - Cycles
+
+   * - Total active CU cycles
+
+     - The number of cycles a CU on the accelerator was active doing *any*
+       work, summed over all :doc:`compute units <compute-unit>` on the
+       accelerator.
+
+     - Cycles
+
+   * - Total SIMD cycles
+
+     - The number of cycles the accelerator was active doing *any* work (that
+       is, kernel cycles), multiplied by the number of
+       :doc:`SIMDs <compute-unit>` on the accelerator. A measure of the
+       total possible active cycles the SIMDs could be doing work, useful for
+       the normalization of metrics inside the CU.
+
+     - Cycles
+
+   * - Total L2 cycles
+
+     - The number of cycles the accelerator was active doing *any* work (that
+       is, kernel cycles), multiplied by the number of :doc:`L2 <l2-cache>`
+       channels on the accelerator. A measure of the total possible active
+       cycles the L2 channels could be doing work, useful for the normalization
+       of metrics inside the L2.
+
+     - Cycles
+
+   * - Total active L2 cycles
+
+     - The number of cycles a channel of the L2 cache was active doing *any*
+       work, summed over all :doc:`L2 <l2-cache>` channels on the accelerator.
+
+     - Cycles
+
+   * - Total sL1D cycles
+
+     - The number of cycles the accelerator was active doing *any* work (that
+       is, kernel cycles), multiplied by the number of
+       :ref:`scalar L1 data caches <desc-sl1d>` on the accelerator. A measure of
+       the total possible active cycles the sL1Ds could be doing work, useful
+       for the normalization of metrics inside the sL1D.
+
+     - Cycles
+
+   * - Total L1I cycles
+
+     - The number of cycles the accelerator was active doing *any* work (that
+       is, kernel cycles), multiplied by the number of
+       :ref:`L1 instruction caches <desc-l1i>` (L1I) on the accelerator. A
+       measure of the total possible active cycles the L1Is could be doing
+       work, useful for the normalization of metrics inside the L1I.
+
+     - Cycles
+
+   * - Total scheduler-pipe cycles
+
+     - The number of cycles the accelerator was active doing *any* work (that
+       is, kernel cycles), multiplied by the number of
+       :doc:`scheduler pipes <command-processor>` on the accelerator. A measure
+       of the total possible active cycles the scheduler-pipes could be doing
+       work, useful for the normalization of metrics inside the
+       :ref:`workgroup manager <desc-spi>` and
+       :doc:`command processor <command-processor>`.
+
+     - Cycles
+
+   * - Total shader-engine cycles
+
+     - The total number of cycles the accelerator was active doing *any* work,
+       multiplied by the number of :doc:`shader engines <shader-engine>` on the
+       accelerator. A measure of the total possible active cycles the shader
+       engines could be doing work, useful for the normalization of
+       metrics inside the :ref:`workgroup manager <desc-spi>`.
+
+     - Cycles
+
+   * - Thread-requests
+
+     - The number of unique memory addresses accessed by a single memory
+       instruction. On AMD Instinct accelerators, this has a maximum of 64
+       (that is, the size of the :ref:`wavefront <wavefront>`).
+
+     - Addresses
+
+   * - Work-item
+
+     - A single *thread*, or lane, of execution that executes in lockstep with
+       the rest of the work-items comprising a :ref:`wavefront <wavefront>`
+       of execution.
+
+     - N/A
+
+   * - Wavefront
+
+     - A group of work-items, or threads, that execute in lockstep on the
+       :doc:`compute unit <compute-unit>`. On AMD Instinct accelerators, the
+       wavefront size is always 64 work-items.
+
+     - N/A
+
+   * - Workgroup
+
+     - A group of wavefronts that execute on the same
+       :doc:`compute unit <compute-unit>`, and can cooperatively execute and
+       share data via the use of synchronization primitives,
+       :doc:`LDS <local-data-share>`, atomics, and others.
+
+     - N/A
+
+   * - Divergence
+
+     - Divergence within a wavefront occurs when not all work-items are active
+       when executing an instruction, that is, due to non-uniform control flow
+       within a wavefront. Can reduce execution efficiency by causing,
+       for instance, the :ref:`VALU <desc-valu>` to need to execute both
+       branches of a conditional with different sets of work-items active.
+
+     - N/A
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/l2-cache.rst b/projects/rocprofiler-compute/docs/conceptual/l2-cache.rst
new file mode 100644
index 0000000000..2c4b44514d
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/l2-cache.rst
@@ -0,0 +1,776 @@
+.. meta::
+   :description: Omniperf performance model: L2 cache (TCC)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, L2, cache, infinity fabric, metrics
+
+**************
+L2 cache (TCC)
+**************
+
+The L2 cache is the coherence point for current AMD Instinct™ MI-series GCN™
+GPUs and CDNA™ accelerators, and is shared by all :doc:`CUs <compute-unit>`
+on the device. Besides serving requests from the
+:doc:`vector L1 data caches <vector-l1-cache>`, the L2 cache also is responsible
+for servicing requests from the :ref:`L1 instruction caches <desc-l1i>`, the
+:ref:`scalar L1 data caches <desc-sL1D>` and the
+:doc:`command processor <command-processor>`. The L2 cache is composed of a
+number of distinct channels (32 on MI100 and :ref:`MI2XX <mixxx-note>` series CDNA
+accelerators at 256B address interleaving) which can largely operate
+independently. Mapping of incoming requests to a specific L2 channel is
+determined by a hashing mechanism that attempts to evenly distribute requests
+across the L2 channels. Requests that miss in the L2 cache are passed out to
+:ref:`Infinity Fabric™ <l2-fabric>` to be routed to the appropriate memory
+location.
+
+The L2 cache metrics reported by Omniperf are broken down into four
+categories:
+
+*  :ref:`L2 Speed-of-Light <l2-sol>`
+
+*  :ref:`L2 cache accesses <l2-cache-accesses>`
+
+*  :ref:`L2-Fabric transactions <l2-fabric>`
+
+*  :ref:`L2-Fabric stalls <l2-fabric-stalls>`
+
+.. _l2-sol:
+
+L2 Speed-of-Light
+=================
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section
+   are currently computed with the maximum achievable clock frequency, as
+   reported by ``rocminfo``, for an accelerator. This may not be realistic for
+   all workloads.
+
+The L2 cache’s speed-of-light table contains a few key metrics about the
+performance of the L2 cache, aggregated over all the L2 channels, as a
+comparison with the peak achievable values of those metrics:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Utilization
+
+     - The ratio of the
+       :ref:`number of cycles an L2 channel was active, summed over all L2 channels on the accelerator <total-active-l2-cycles>`
+       over the :ref:`total L2 cycles <total-l2-cycles>`.
+
+     - Percent
+
+   * - Bandwidth
+
+     - The number of bytes looked up in the L2 cache, as a percent of the peak
+       theoretical bandwidth achievable on the specific accelerator. The number
+       of bytes is calculated as the number of cache lines requested multiplied
+       by the cache line size. This value does not consider partial requests, so
+       e.g., if only a single value is requested in a cache line, the data
+       movement will still be counted as a full cache line.
+
+     - Percent
+
+   * - Hit Rate
+
+     - The ratio of the number of L2 cache line requests that hit in the L2
+       cache over the total number of incoming cache line requests to the L2
+       cache.
+
+     - Percent
+
+   * - L2-Fabric Read BW
+
+     - The number of bytes read by the L2 over the
+       :ref:`Infinity Fabric interface <l2-fabric>` per unit time.
+
+     - GB/s
+
+   * - L2-Fabric Write and Atomic BW
+
+     - The number of bytes sent by the L2 over the
+       :ref:`Infinity Fabric interface <l2-fabric>` by write and atomic
+       operations per unit time.
+
+     - GB/s
+
+.. note::
+
+   The L2 cache on AMD Instinct MI CDNA accelerators uses a "hit-on-miss"
+   approach to reporting cache hits. That is, if while satisfying a miss,
+   another request comes in that would hit on the same pending cache line, the
+   subsequent request will be counted as a 'hit'. Therefore, it is also
+   important to consider the latency metric in the :ref:`L2-Fabric <l2-fabric>`
+   section when evaluating the L2 hit rate.
+
+.. _l2-cache-accesses:
+
+L2 cache accesses
+=================
+
+This section details the incoming requests to the L2 cache from the
+:doc:`vL1D <vector-l1-cache>` and other clients -- for instance, the
+:ref:`sL1D <desc-sL1D>` and :ref:`L1I <desc-l1i>` caches.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 13 70 17
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Bandwidth
+
+     - The number of bytes looked up in the L2 cache, per
+       :ref:`normalization unit <normalization-units>`.  The number of bytes is
+       calculated as the number of cache lines requested multiplied by the cache
+       line size. This value does not consider partial requests, so for example,
+       if only a single value is requested in a cache line, the data movement
+       will still be counted as a full cache line.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`.
+
+   * - Requests
+
+     - The total number of incoming requests to the L2 from all clients for all
+       request types, per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Read Requests
+
+     - The total number of read requests to the L2 from all clients.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Write Requests
+
+     - The total number of write requests to the L2 from all clients.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Atomic Requests
+
+     - The total number of atomic requests (with and without return) to the L2
+       from all clients.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Streaming Requests
+
+     - The total number of incoming requests to the L2 that are marked as
+       *streaming*. The exact meaning of this may differ depending on the
+       targeted accelerator, however on an :ref:`MI2XX <mixxx-note>` this
+       corresponds to
+       `non-temporal load or stores <https://clang.llvm.org/docs/LanguageExtensions.html#non-temporal-load-store-builtins>`_.
+       The L2 cache attempts to evict *streaming* requests before normal
+       requests when the L2 is at capacity.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Probe Requests
+
+     - The number of coherence probe requests made to the L2 cache from outside
+       the accelerator. On an :ref:`MI2XX <mixxx-note>`, probe requests may be
+       generated by, for example, writes to
+       :ref:`fine-grained device <memory-type>` memory or by writes to 
+       :ref:`coarse-grained <memory-type>` device memory.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Hit Rate
+
+     - The ratio of the number of L2 cache line requests that hit in the L2
+       cache over the total number of incoming cache line requests to the L2
+       cache.
+
+     - Percent
+
+   * - Hits
+
+     - The total number of requests to the L2 from all clients that hit in the
+       cache. As noted in the :ref:`Speed-of-Light <l2-sol>` section, this
+       includes hit-on-miss requests.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Misses
+
+     - The total number of requests to the L2 from all clients that miss in the
+       cache. As noted in the :ref:`Speed-of-Light <l2-sol>` section, these do
+       not include hit-on-miss requests.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Writebacks
+
+     - The total number of L2 cache lines written back to memory for any reason.
+       Write-backs may occur due to user code (such as HIP kernel calls to
+       ``__threadfence_system`` or atomic built-ins) by the
+       :doc:`command processor <command-processor>`'s memory acquire/release
+       fences, or for other internal hardware reasons.
+
+     - Cache lines per :ref:`normalization unit <normalization-units>`
+
+   * - Writebacks (Internal)
+
+     - The total number of L2 cache lines written back to memory for internal
+       hardware reasons, per :ref:`normalization unit <normalization-units>`.
+
+     - Cache lines per :ref:`normalization unit <normalization-units>`.
+
+   * - Writebacks (vL1D Req)
+
+     - The total number of L2 cache lines written back to memory due to requests
+       initiated by the :doc:`vL1D cache <vector-l1-cache>`, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cache lines per :ref:`normalization unit <normalization-units>`.
+
+   * - Evictions (Normal)
+
+     - The total number of L2 cache lines evicted from the cache due to capacity
+       limits, per :ref:`normalization unit <normalization-units>`.
+
+     - Cache lines per :ref:`normalization unit <normalization-units>`.
+
+   * - Evictions (vL1D Req)
+
+     - The total number of L2 cache lines evicted from the cache due to
+       invalidation requests initiated by the
+       :doc:`vL1D cache <vector-l1-cache>`, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cache lines per :ref:`normalization unit <normalization-units>`.
+
+   * - Non-hardware-Coherent Requests
+
+     - The total number of requests to the L2 to Not-hardware-Coherent (NC)
+       memory allocations, per :ref:`normalization unit <normalization-units>`.
+       See the :ref:`memory-type` for more information.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Uncached Requests
+
+     - The total number of requests to the L2 that go to Uncached (UC) memory
+       allocations. See the :ref:`memory-type` for more information.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Coherently Cached Requests
+
+     - The total number of requests to the L2 that go to Coherently Cacheable (CC)
+       memory allocations. See the :ref:`memory-type` for more information.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Read/Write Coherent Requests
+
+     - The total number of requests to the L2 that go to Read-Write coherent memory
+       (RW) allocations. See the :ref:`memory-type` for more information.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+.. note::
+
+   All requests to the L2 are for a single cache line's worth of data. The size
+   of a cache line may vary depending on the accelerator, however on an AMD
+   Instinct CDNA2 :ref:`MI2XX <mixxx-note>` accelerator, it is 128B, while on
+   an MI100, it is 64B.
+
+.. _l2-fabric:
+
+L2-Fabric transactions
+======================
+
+Requests/data that miss in the L2 must be routed to memory in order to
+service them. The backing memory for a request may be local to this
+accelerator (i.e., in the local high-bandwidth memory), in a remote
+accelerator’s memory, or even in the CPU’s memory. Infinity Fabric
+is responsible for routing these memory requests/data to the correct
+location and returning any fetched data to the L2 cache. The
+:ref:`l2-request-flow` describes the flow of these requests through
+Infinity Fabric in more detail, as described by Omniperf metrics,
+while :ref:`l2-request-metrics` give detailed definitions of
+individual metrics.
+
+.. _l2-request-flow:
+
+Request flow
+------------
+
+The following is a diagram that illustrates how L2↔Fabric requests are reported
+by Omniperf:
+
+.. figure:: ../data/performance-model/fabric.png
+   :align: center
+   :alt: L2-Fabric transaction flow on AMD Instinct MI-series accelerators
+   :width: 800
+
+   L2↔Fabric transaction flow on AMD Instinct MI-series accelerators.
+
+
+Requests from the L2 Cache are broken down into two major categories, read
+requests and write requests (at this granularity, atomic requests are treated
+as writes).
+
+From there, these requests can additionally subdivided in a number of ways.
+First, these requests may be sent across Infinity Fabric as different
+transaction sizes, 32B or 64B on current CDNA accelerators.
+
+.. note::
+
+   On current CDNA accelerators, the 32B read request path is expected to be
+   unused and so is disconnected in the flow diagram.
+
+In addition, the read and write requests can be further categorized as:
+
+* Uncached read/write requests, for instance: for access to
+  :ref:`fine-grained memory <memory-type>`
+
+* Atomic requests, for instance: for atomic updates to
+  :ref:`fine-grained memory <memory-type>`
+
+* HBM read/write requests OR remote read/write requests, for instance: for
+  requests to the accelerator’s local HBM OR requests to a remote accelerator’s
+  HBM or the CPU’s DRAM
+
+These classifications are not necessarily *exclusive*. For example, a
+write request can be classified as an atomic request to the
+accelerator’s local HBM, and an uncached write request. The request-flow
+diagram marks *exclusive* classifications as a splitting of the flow,
+while *non-exclusive* requests do not split the flow line. For example,
+a request is either a 32B Write Request OR a 64B Write request, as the
+flow splits at this point:
+
+.. figure:: ../data/performance-model/split.*
+   :align: center
+   :alt: Splitting request flow
+   :width: 800
+
+   Splitting request flow
+
+However, continuing along, the same request might be an atomic request and an
+uncached write request, as reflected by a non-split flow:
+
+.. figure:: ../data/performance-model/nosplit.*
+   :align: center
+   :alt: Non-splitting request flow
+   :width: 800
+
+   Non-splitting request flow
+
+Finally, we note that :ref:`uncached <memory-type>` read requests (e.g., to
+:ref:`fine-grained memory <memory-type>`) are handled specially on CDNA
+accelerators, as indicated in the request flow diagram. These are
+expected to be counted as a 64B Read Request, and *if* they are requests
+to uncached memory (denoted by the dashed line), they will also be
+counted as *two* uncached read requests (that is, the request is split):
+
+.. figure:: ../data/performance-model/uncached.*
+   :align: center
+   :alt: Uncached read-request splitting
+   :width: 800
+
+   Uncached read-request splitting.
+
+.. _l2-request-metrics:
+
+Metrics
+-------
+
+ The following metrics are reported for the L2-Fabric interface:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - L2-Fabric Read Bandwidth
+
+     - The total number of bytes read by the L2 cache from Infinity Fabric per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`.
+
+   * - HBM Read Traffic
+
+     - The percent of read requests generated by the L2 cache that are routed to
+       the accelerator's local high-bandwidth memory (HBM). This breakdown does
+       not consider the *size* of the request (meaning that 32B and 64B requests
+       are both counted as a single request), so this metric only *approximates*
+       the percent of the L2-Fabric Read bandwidth directed to the local HBM.
+
+     - Percent
+
+   * - Remote Read Traffic
+
+     - The percent of read requests generated by the L2 cache that are routed to
+       any memory location other than the accelerator's local high-bandwidth
+       memory (HBM) -- for example, the CPU's DRAM or a remote accelerator's
+       HBM. This breakdown does not consider the *size* of the request (meaning
+       that 32B and 64B requests are both counted as a single request), so this
+       metric only *approximates* the percent of the L2-Fabric Read bandwidth
+       directed to a remote location.
+
+     - Percent
+
+   * - Uncached Read Traffic
+
+     - The percent of read requests generated by the L2 cache that are reading
+       from an :ref:`uncached memory allocation <memory-type>`. Note, as
+       described in the :ref:`request flow <l2-request-flow>` section, a single
+       64B read request is typically counted as two uncached read requests. So,
+       it is possible for the Uncached Read Traffic to reach up to 200% of the
+       total number of read requests. This breakdown does not consider the
+       *size* of the request (i.e., 32B and 64B requests are both counted as a
+       single request), so this metric only *approximates* the percent of the
+       L2-Fabric read bandwidth directed to an uncached memory location.
+
+     - Percent
+
+   * - L2-Fabric Write and Atomic Bandwidth
+
+     - The total number of bytes written by the L2 over Infinity Fabric by write
+       and atomic operations per
+       :ref:`normalization unit <normalization-units>`. Note that on current
+       CDNA accelerators, such as the :ref:`MI2XX <mixxx-note>`, requests are
+       only considered *atomic* by Infinity Fabric if they are targeted at
+       non-write-cacheable memory, for example,
+       :ref:`fine-grained memory <memory-type>` allocations or
+       :ref:`uncached memory <memory-type>` allocations on the
+       MI2XX.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`.
+
+   * - HBM Write and Atomic Traffic
+
+     - The percent of write and atomic requests generated by the L2 cache that
+       are routed to the accelerator's local high-bandwidth memory (HBM). This
+       breakdown does not consider the *size* of the request (meaning that 32B
+       and 64B requests are both counted as a single request), so this metric
+       only *approximates* the percent of the L2-Fabric Write and Atomic
+       bandwidth directed to the local HBM. Note that on current CDNA
+       accelerators, such as the :ref:`MI2XX <mixxx-note>`, requests are only
+       considered *atomic* by Infinity Fabric if they are targeted at
+       :ref:`fine-grained memory <memory-type>` allocations or
+       :ref:`uncached memory <memory-type>` allocations.
+
+     - Percent
+
+   * - Remote Write and Atomic Traffic
+
+     - The percent of read requests generated by the L2 cache that are routed to
+       any memory location other than the accelerator's local high-bandwidth
+       memory (HBM) -- for example, the CPU's DRAM or a remote accelerator's
+       HBM. This breakdown does not consider the *size* of the request (meaning
+       that 32B and 64B requests are both counted as a single request), so this
+       metric only *approximates* the percent of the L2-Fabric Read bandwidth
+       directed to a remote location. Note that on current CDNA
+       accelerators, such as the :ref:`MI2XX <mixxx-note>`, requests are only
+       considered *atomic* by Infinity Fabric if they are targeted at
+       :ref:`fine-grained memory <memory-type>` allocations or
+       :ref:`uncached memory <memory-type>` allocations.
+
+     - Percent
+
+   * - Atomic Traffic
+
+     - The percent of write requests generated by the L2 cache that are atomic
+       requests to *any* memory location. This breakdown does not consider the
+       *size* of the request (meaning that 32B and 64B requests are both counted
+       as a single request), so this metric only *approximates* the percent of
+       the L2-Fabric Read bandwidth directed to a remote location. Note that on
+       current CDNA accelerators, such as the :ref:`MI2XX <mixxx-note>`,
+       requests are only considered *atomic* by Infinity Fabric if they are
+       targeted at :ref:`fine-grained memory <memory-type>` allocations or
+       :ref:`uncached memory <memory-type>` allocations.
+
+     - Percent
+
+   * - Uncached Write and Atomic Traffic
+
+     - The percent of write and atomic requests generated by the L2 cache that
+       are targeting :ref:`uncached memory allocations <memory-type>`. This
+       breakdown does not consider the *size* of the request (meaning that 32B
+       and 64B requests are both counted as a single request), so this metric
+       only *approximates* the percent of the L2-Fabric read bandwidth directed
+       to uncached memory allocations.
+
+     - Percent
+
+   * - Read Latency
+
+     - The time-averaged number of cycles read requests spent in Infinity Fabric
+       before data was returned to the L2.
+
+     - Cycles
+
+   * - Write Latency
+
+     - The time-averaged number of cycles write requests spent in Infinity
+       Fabric before a completion acknowledgement was returned to the L2.
+
+     - Cycles
+
+   * - Atomic Latency
+
+     - The time-averaged number of cycles atomic requests spent in Infinity
+       Fabric before a completion acknowledgement (atomic without return value)
+       or data (atomic with return value) was returned to the L2.
+
+     - Cycles
+
+   * - Read Stall
+
+     - The ratio of the total number of cycles the L2-Fabric interface was
+       stalled on a read request to any destination (local HBM, remote PCIe®
+       connected accelerator or CPU, or remote Infinity Fabric connected
+       accelerator [#inf]_ or CPU) over the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Write Stall
+
+     - The ratio of the total number of cycles the L2-Fabric interface was
+       stalled on a write or atomic request to any destination (local HBM,
+       remote accelerator or CPU, PCIe connected accelerator or CPU, or remote
+       Infinity Fabric connected accelerator [#inf]_ or CPU) over the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+.. _l2-detailed-metrics:
+
+Detailed transaction metrics
+----------------------------
+
+The following metrics are available in the detailed L2-Fabric
+transaction breakdown table:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - 32B Read Requests
+
+     - The total number of L2 requests to Infinity Fabric to read 32B of data
+       from any memory location, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail. Typically unused on CDNA
+       accelerators.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Uncached Read Requests
+
+     - The total number of L2 requests to Infinity Fabric to read
+       :ref:`uncached data <memory-type>` from any memory location, per
+       :ref:`normalization unit <normalization-units>`. 64B requests for
+       uncached data are counted as two 32B uncached data requests. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - 64B Read Requests
+
+     - The total number of L2 requests to Infinity Fabric to read 64B of data
+       from any memory location, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - HBM Read Requests
+
+     - The total number of L2 requests to Infinity Fabric to read 32B or 64B of
+       data from the accelerator's local HBM, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Remote Read Requests
+
+     - The total number of L2 requests to Infinity Fabric to read 32B or 64B of
+       data from any source other than the accelerator's local HBM, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - 32B Write and Atomic Requests
+
+     - The total number of L2 requests to Infinity Fabric to write or atomically
+       update 32B of data to any memory location, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Uncached Write and Atomic Requests
+
+     - The total number of L2 requests to Infinity Fabric to write or atomically
+       update 32B or 64B of :ref:`uncached data <memory-type>`, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - 64B Write and Atomic Requests
+
+     - The total number of L2 requests to Infinity Fabric to write or atomically
+       update 64B of data in any memory location, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - HBM Write and Atomic Requests
+
+     - The total number of L2 requests to Infinity Fabric to write or atomically
+       update 32B or 64B of data in the accelerator's local HBM, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Remote Write and Atomic Requests
+
+     - The total number of L2 requests to Infinity Fabric to write or atomically
+       update 32B or 64B of data in any memory location other than the
+       accelerator's local HBM, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Atomic Requests
+
+     - The total number of L2 requests to Infinity Fabric to atomically update
+       32B or 64B of data in any memory location, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`l2-request-flow` for more detail. Note that on current CDNA
+       accelerators, such as the :ref:`MI2XX <mixxx-note>`, requests are only
+       considered *atomic* by Infinity Fabric if they are targeted at
+       non-write-cacheable memory, such as
+       :ref:`fine-grained memory <memory-type>` allocations or
+       :ref:`uncached memory <memory-type>` allocations on the MI2XX.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+.. _l2-fabric-stalls:
+
+L2-Fabric interface stalls
+==========================
+
+When the interface between the L2 cache and Infinity Fabric becomes backed up by
+requests, it may stall, preventing the L2 from issuing additional requests to
+Infinity Fabric until prior requests complete. This section gives a breakdown of
+what types of requests in a kernel caused a stall (like read versus write), and
+to which locations -- for instance, to the accelerator’s local memory, or to
+remote accelerators or CPUs.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Read - PCIe Stall
+
+     - The number of cycles the L2-Fabric interface was stalled on read requests
+       to remote PCIe connected accelerators [#inf]_ or CPUs as a percent of the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Read - Infinity Fabric Stall
+
+     - The number of cycles the L2-Fabric interface was stalled on read requests
+       to remote Infinity Fabric connected accelerators [#inf]_ or CPUs as a
+       percent of the :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Read - HBM Stall
+
+     - The number of cycles the L2-Fabric interface was stalled on read requests
+       to the accelerator's local HBM as a percent of the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Write - PCIe Stall
+
+     - The number of cycles the L2-Fabric interface was stalled on write or
+       atomic requests to remote PCIe connected accelerators [#inf]_ or CPUs as
+       a percent of the :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Write - Infinity Fabric Stall
+
+     - The number of cycles the L2-Fabric interface was stalled on write or
+       atomic requests to remote Infinity Fabric connected accelerators [#inf]_
+       or CPUs as a percent of the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Write - HBM Stall
+
+     - The number of cycles the L2-Fabric interface was stalled on write or
+       atomic requests to accelerator's local HBM as a percent of the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+   * - Write - Credit Starvation
+
+     - The number of cycles the L2-Fabric interface was stalled on write or
+       atomic requests to any memory location because too many write/atomic
+       requests were currently in flight, as a percent of the
+       :ref:`total active L2 cycles <total-active-l2-cycles>`.
+
+     - Percent
+
+.. warning::
+
+   On current CDNA accelerators and GCN GPUs, these L2↔Fabric stalls can be undercounted in some circumstances.
+
+.. rubric:: Footnotes
+
+.. [#inf] In addition to being used for on-accelerator data-traffic, AMD
+   `Infinity Fabric <https://www.amd.com/en/technologies/infinity-architecture>`_
+   technology can be used to connect multiple accelerators to achieve advanced
+   peer-to-peer connectivity and enhanced bandwidths over traditional PCIe
+   connections. Some AMD Instinct MI-series accelerators like the MI250X
+   `feature coherent CPU↔accelerator connections built using AMD Infinity Fabric <https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf>`_.
+
+.. rubric:: Disclaimer
+
+PCIe® is a registered trademark of PCI-SIG Corporation.
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/local-data-share.rst b/projects/rocprofiler-compute/docs/conceptual/local-data-share.rst
new file mode 100644
index 0000000000..c596844dce
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/local-data-share.rst
@@ -0,0 +1,183 @@
+.. meta::
+   :description: Omniperf performance model: Local data share (LDS)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, local, data, share, LDS
+
+**********************
+Local data share (LDS)
+**********************
+
+.. _lds-sol:
+
+LDS Speed-of-Light
+==================
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section are
+   currently computed with the maximum achievable clock frequency, as reported
+   by ``rocminfo``, for an accelerator. This may not be realistic for all
+   workloads.
+
+The :ref:`LDS <desc-lds>` speed-of-light chart shows a number of key metrics for
+the LDS as a comparison with the peak achievable values of those metrics.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Utilization
+
+     - Indicates what percent of the kernel's duration the :ref:`LDS <desc-lds>`
+       was actively executing instructions (including, but not limited to, load,
+       store, atomic and HIP's ``__shfl`` operations).  Calculated as the ratio
+       of the total number of cycles LDS was active over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - Access Rate
+
+     - Indicates the percentage of SIMDs in the :ref:`VALU <desc-valu>` [#lds-workload]_
+       actively issuing LDS instructions, averaged over the lifetime of the
+       kernel. Calculated as the ratio of the total number of cycles spent by
+       the :ref:`scheduler <desc-scheduler>` issuing :ref:`LDS <desc-lds>`
+       instructions over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - Theoretical Bandwidth (% of Peak)
+
+     - Indicates the maximum amount of bytes that *could* have been loaded from,
+       stored to, or atomically updated in the LDS in this kernel, as a percent
+       of the peak LDS bandwidth achievable. See the
+       :ref:`LDS bandwidth example <lds-bandwidth>` for more detail.
+
+     - Percent
+
+   * - Bank Conflict Rate
+
+     - Indicates the percentage of active LDS cycles that were spent servicing
+       bank conflicts. Calculated as the ratio of LDS cycles spent servicing
+       bank conflicts over the number of LDS cycles that would have been
+       required to move the same amount of data in an uncontended access. [#lds-bank-conflict]_
+
+     - Percent
+
+.. rubric:: Footnotes
+
+.. [#lds-workload] Here we assume the typical case where the workload evenly distributes
+   LDS operations over all SIMDs in a CU (that is, waves on different SIMDs are
+   executing similar code). For highly unbalanced workloads, where e.g., one
+   SIMD pair in the CU does not issue LDS instructions at all, this metric is
+   better interpreted as the percentage of SIMDs issuing LDS instructions on
+   :ref:`SIMD pairs <desc-lds>` that are actively using the LDS, averaged over
+   the lifetime of the kernel.
+
+.. [#lds-bank-conflict] The maximum value of the bank conflict rate is less than 100%
+   (specifically: 96.875%), as the first cycle in the
+   :ref:`LDS scheduler <desc-lds>` is never considered contended.
+
+.. _lds-stats:
+
+Statistics
+==========
+
+The LDS statistics panel gives a more detailed view of the hardware:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - LDS Instructions
+
+     - The total number of LDS instructions (including, but not limited to,
+       read/write/atomics and HIP's ``__shfl`` instructions) executed per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Theoretical Bandwidth
+
+     - Indicates the maximum amount of bytes that could have been loaded from,
+       stored to, or atomically updated in the LDS per
+       :ref:`normalization unit <normalization-units>`. Does *not* take into
+       account the execution mask of the wavefront when the instruction was
+       executed. See the
+       :ref:`LDS bandwidth example <lds-bandwidth>` for more detail.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`
+
+   * - LDS Latency
+
+     - The average number of round-trip cycles (i.e., from issue to data-return
+       / acknowledgment) required for an LDS instruction to complete.
+
+     - Cycles
+
+   * - Bank Conflicts/Access
+
+     - The ratio of the number of cycles spent in the
+       :ref:`LDS scheduler <desc-lds>` due to bank conflicts (as determined by
+       the conflict resolution hardware) to the base number of cycles that would
+       be spent in the LDS scheduler in a completely uncontended case. This is
+       the unnormalized form of the Bank Conflict Rate.
+
+     - Conflicts/Access
+
+   * - Index Accesses
+
+     - The total number of cycles spent in the :ref:`LDS scheduler <desc-lds>`
+       over all operations per :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Atomic Return Cycles
+
+     - The total number of cycles spent on LDS atomics with return per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Bank Conflicts
+
+     - The total number of cycles spent in the :ref:`LDS scheduler <desc-lds>`
+       due to bank conflicts (as determined by the conflict resolution hardware)
+       per :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Address Conflicts
+
+     - The total number of cycles spent in the :ref:`LDS scheduler <desc-lds>`
+       due to address conflicts (as determined by the conflict resolution
+       hardware) per :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Unaligned Stall
+
+     - The total number of cycles spent in the :ref:`LDS scheduler <desc-lds>`
+       due to stalls from non-dword aligned addresses per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Memory Violations
+
+     - The total number of out-of-bounds accesses made to the LDS, per
+       :ref:`normalization unit <normalization-units>`. This is unused and
+       expected to be zero in most configurations for modern CDNA™ accelerators.
+
+     - Accesses per :ref:`normalization unit <normalization-units>`
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/performance-model.rst b/projects/rocprofiler-compute/docs/conceptual/performance-model.rst
new file mode 100644
index 0000000000..1a94b3ed69
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/performance-model.rst
@@ -0,0 +1,49 @@
+.. meta::
+   :description: Omniperf performance model
+   :keywords: Omniperf, ROCm, performance, model, profiler, tool, Instinct,
+              accelerator, AMD
+
+*****************
+Performance model
+*****************
+
+Omniperf makes available an extensive list of metrics to better understand
+achieved application performance on AMD Instinct™ MI-series accelerators
+including Graphics Core Next™ (GCN) GPUs like the AMD Instinct MI50, CDNA™
+accelerators like the MI100, and CDNA2 accelerators such as the MI250X, MI250,
+and MI210.
+
+To best use profiling data, it's important to understand the role of various
+hardware blocks of AMD Instinct accelerators. This section describes each
+hardware block on the accelerator as interacted with by a software developer to
+give a deeper understanding of the metrics reported by profiling data. Refer to
+:doc:`/tutorial/profiling-by-example` for more practical examples and details on how
+to use Omniperf to optimize your code.
+
+.. _mixxx-note:
+
+.. note::
+
+   In this chapter, **MI2XX** refers to any of the CDNA2 architecture-based AMD
+   Instinct MI250X, MI250, and MI210 accelerators interchangeably in cases
+   where the exact product at hand is not relevant.
+
+   For a comparison of AMD Instinct accelerator specifications, refer to
+   :doc:`Hardware specifications <rocm:reference/gpu-arch-specs>`. For product
+   details, see the :prod-page:`MI250X <mi200/mi250x>`,
+   :prod-page:`MI250 <mi200/mi250>`, and :prod-page:`MI210 <mi200/mi210>`
+   product pages.
+
+In this chapter, the AMD Instinct performance model used by Omniperf is divided into a handful of
+key hardware blocks, each detailed in the following sections:
+
+* :doc:`compute-unit`
+
+* :doc:`l2-cache`
+
+* :doc:`shader-engine`
+
+* :doc:`command-processor`
+
+* :doc:`system-speed-of-light`
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/pipeline-descriptions.rst b/projects/rocprofiler-compute/docs/conceptual/pipeline-descriptions.rst
new file mode 100644
index 0000000000..9261421eb6
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/pipeline-descriptions.rst
@@ -0,0 +1,299 @@
+.. meta::
+   :description: Omniperf performance model: Shader engine (SE)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, pipeline, VALU, SALU, VMEM, SMEM, LDS, branch,
+              scheduler, MFMA, AGPRs
+
+*********************
+Pipeline descriptions
+*********************
+
+This section details the various execution pipelines of the
+:doc:`compute unit <compute-unit>`.
+
+.. _desc-valu:
+
+.. _desc-vmem:
+
+Vector arithmetic logic unit (VALU)
+-----------------------------------
+
+The vector arithmetic logic unit (VALU) executes vector instructions
+over an entire wavefront, each :ref:`work-item <desc-work-item>` (or,
+vector-lane) potentially operating on distinct data. The VALU of a CDNA™
+accelerator or GCN™ GPU typically consists of:
+
+*  Four 16-wide SIMD processors (see :hip-training-pdf:`24` for more details).
+
+*  Four 64 or 128 KiB VGPR files (yielding a total of 256-512 KiB total
+   per CU), see :ref:`AGPRs <desc-agprs>` for more detail.
+
+*  An instruction buffer (per-SIMD) that contains execution slots for up
+   to 8 wavefronts (for 32 total wavefront slots on each CU).
+
+*  A vector memory (VMEM) unit which transfers data between VGPRs and
+   memory; each work-item supplies its own memory address and supplies
+   or receives unique data.
+
+*  CDNA accelerators, such as the MI100 and :ref:`MI2XX <mixxx-note>`, contain
+   additional
+   :amd-lab-note:`Matrix Fused Multiply-Add (MFMA) <amd-lab-notes-matrix-cores-readme>`
+   units.
+
+To support branching and conditionals, each wavefront in the VALU
+has a distinct execution mask which determines which work-items in the
+wavefront are active for the currently executing instruction. When
+executing a VALU instruction, inactive work-items (according to the
+current execution mask of the wavefront) do not execute the instruction
+and are treated as no-ops.
+
+.. note::
+
+   On GCN GPUs and the CDNA MI100 accelerator, there are slots for up to 10
+   wavefronts in the instruction buffer, but generally occupancy is limited by
+   other factors to 32 waves per :doc:`compute unit <compute-unit>`.
+   On the CDNA2 :ref:`MI2XX <mixxx-note>` series accelerators, there are only 8
+   waveslots per-SIMD.
+
+.. _desc-salu:
+
+.. _desc-smem:
+
+Scalar arithmetic logic unit (SALU)
+-----------------------------------
+
+The scalar arithmetic logic unit (SALU) executes instructions that are
+shared between all work-items in a wavefront. This includes control flow
+such as if/else conditionals, branches and looping pointer arithmetic, loading
+common values, and more.
+
+The SALU consists of:
+
+*  A scalar processor capable of various arithmetic, conditional, and
+   comparison (etc.) operations. See
+   :mi200-isa-pdf:`Chapter 5. Scalar ALU Operations <35>`
+   of the CDNA2 Instruction Set Architecture (ISA) Reference Guide for more
+   detail.
+
+*  A 12.5 KiB Scalar General Purpose Register (SGPR) file
+
+*  A scalar memory (SMEM) unit which transfers data between SGPRs and
+   memory
+
+Data loaded by the SMEM can be cached in the :ref:`scalar L1 data cache <desc-sl1d>`,
+and is typically only used for read-only, uniform accesses such as kernel
+arguments, or HIP’s ``__constant__`` memory.
+
+.. _desc-lds:
+
+Local data share (LDS)
+----------------------
+
+The local data share (LDS, a.k.a., "shared memory") is fast on-CU scratchpad
+that can be explicitly managed by software to effectively share data and to
+coordinate between wavefronts in a workgroup.
+
+.. figure:: ../data/performance-model/lds.*
+   :align: center
+   :alt: Performance model of the local data share (LDS) on AMD Instinct
+         accelerators
+   :width: 800
+
+   Performance model of the local data share (LDS) on AMD Instinct MI-series
+   accelerators.
+
+Above is Omniperf's performance model of the LDS on CDNA accelerators (adapted
+from  :mantor-gcn-pdf:`20`). The SIMDs in the :ref:`VALU <desc-valu>` are
+connected to the LDS in pairs (see above). Only one SIMD per pair may issue an
+LDS instruction at a time, but both pairs may issue concurrently.
+
+On CDNA accelerators, the LDS contains 32 banks and each bank is 4B wide.
+The LDS is designed such that each bank can be read from, written to, or
+atomically updated every cycle, for a total throughput of 128B/clock
+(:gcn-crash-course:`40`).
+
+On each of the two ports to the SIMDs, 64B can be sent in each direction per
+cycle. So, a single wavefront, coming from one of the 2 SIMDs in a pair, can
+only get back 64B/cycle (16 lanes per cycle). The input port is shared between
+data and address and this can affect achieved bandwidth for different data
+sizes. For example, a 64-wide store where each lane is sending a 4B value takes
+8 cycles (50% peak bandwidth) while a 64-wide store where each lane is sending
+a 16B value takes 20 cycles (80% peak bandwidth).
+
+In addition, the LDS contains conflict-resolution hardware to detect and handle
+bank conflicts. A bank conflict occurs when two (or more)
+:ref:`work-items <desc-work-item>` in a :ref:`wavefront <desc-wavefront>` want
+to read, write, or atomically update different addresses that map to the same
+bank in the same cycle. In this case, the conflict detection hardware will
+determine a new schedule such that the access is split into multiple cycles with
+no conflicts in any single cycle.
+
+When multiple work-items want to read from the same address within a bank, the
+result can be efficiently broadcasted (:gcn-crash-course:`41`). Multiple
+work-items writing to the same address within a bank typically results undefined
+behavior in HIP and other high-level languages, as the LDS will write the value from the
+last work-item as determined by the hardware scheduler (:gcn-crash-course:`41`).
+This behavior may be useful in the very specific case of storing a uniform
+value.
+
+Relatedly, an address conflict is defined as occurring when two (or more)
+work-items in a wavefront want to atomically update the same address on the same
+cycle. As in a bank-conflict, this may cause additional cycles of work for the
+LDS operation to complete.
+
+.. _desc-branch:
+
+Branch
+------
+
+The branch unit is responsible for executing jumps and branches to execute
+control flow operations.
+Note that Branch operations are not used for execution mask updates, but only
+for “whole wavefront” control-flow changes.
+
+.. _desc-scheduler:
+
+Scheduler
+---------
+
+The scheduler is responsible for arbitration and issue of instructions for all
+the wavefronts currently executing on the :doc:`CU <compute-unit>`. On every
+clock cycle, the scheduler:
+
+* Considers waves from one of the SIMD units for execution, selected in a
+  round-robin fashion between the SIMDs in the compute unit
+
+* Issues up to one instruction per wavefront on the selected SIMD
+
+* Issues up to one instruction per each of the instruction categories among the waves on the selected SIMD:
+
+  * :ref:`VALU <desc-valu>`
+
+  * :ref:`VMEM <desc-vmem>` operations
+
+  * :ref:`SALU <desc-salu>` / SMEM operations
+
+  * :ref:`LDS <desc-lds>`
+
+  * :ref:`Branch <desc-branch>` operations
+
+This gives a maximum of five issued Instructions Per Cycle (IPC), per-SIMD,
+per-CU (:hip-training-pdf:`Introduction to AMD GPU Programming with HIP <>`,
+:gcn-crash-course:`The AMD GCN Architecture - A Crash Course <>`). On CDNA
+accelerators with :ref:`MFMA <desc-mfma>` instructions, these are issued via the
+:ref:`VALU <desc-valu>`. Some of them will execute on a separate functional unit
+and typically allow other :ref:`VALU <desc-valu>` operations to execute in their
+shadow (see the :ref:`MFMA <desc-mfma>` section for more detail).
+
+.. note::
+
+   The IPC model used by Omniperf omits the following two complications for
+   clarity. First, CDNA accelerators contain other execution units on the CU
+   that are unused for compute applications. Second, so-called "internal"
+   instructions (see :gcn-crash-course:`29`) are not issued to a functional
+   unit, and can technically cause the maximum IPC to *exceed* 5 instructions
+   per-cycle in special (largely unrealistic) cases. The latter issue is
+   discussed in more detail in the
+   :ref:`'internal' IPC <ipc-internal-instructions>` example.
+
+.. _desc-mfma:
+
+Matrix fused multiply-add (MFMA)
+--------------------------------
+
+CDNA accelerators, such as the MI100 and :ref:`MI2XX <mixxx-note>`, contain
+specialized hardware to accelerate matrix-matrix multiplications, also
+known as Matrix Fused Multiply-Add (MFMA) operations. The exact
+operation types and supported formats may vary by accelerator. Refer to the
+:amd-lab-note:`AMD matrix cores <amd-lab-notes-matrix-cores-readme>`
+blog post on GPUOpen for a general discussion of these hardware units.
+In addition, to explore the available MFMA instructions in-depth on
+various AMD accelerators (including the CDNA line), we recommend the
+`AMD Matrix Instruction Calculator <https://github.com/ROCm/amd_matrix_instruction_calculator>`_:
+
+.. code-block:: shell
+   :caption: Partial snapshot of the AMD Matrix Instruction Calculator Tool
+
+    $ ./matrix_calculator.py –architecture cdna2 –instruction v_mfma_f32_4x4x1f32 –detail-instruction
+    Architecture: CDNA2
+    Instruction: V_MFMA_F32_4X4X1F32
+        Encoding: VOP3P-MAI
+        VOP3P Opcode: 0x42
+        VOP3P-MAI Opcode: 0x2
+        Matrix Dimensions:
+            M: 4
+            N: 4
+            K: 1
+            blocks: 16
+        Execution statistics:
+            FLOPs: 512
+            Execution cycles: 8
+            FLOPs/CU/cycle: 256
+            Can co-execute with VALU: True
+            VALU co-execution cycles possible: 4
+        Register usage:
+            GPRs required for A: 1
+            GPRs required for B: 1
+            GPRs required for C: 4
+            GPRs required for D: 4
+            GPR alignment requirement: 8 bytes
+
+For the purposes of Omniperf, the MFMA unit is typically treated as a separate
+pipeline from the :ref:`VALU <desc-valu>`, as other VALU instructions (along
+with other execution pipelines such as the :ref:`SALU <desc-salu>`) typically can be
+issued during a portion of the total duration of an MFMA operation.
+
+.. note::
+
+   The exact details of VALU and MFMA operation co-execution vary by
+   instruction, and can be explored in more detail via the following fields in
+   the
+   `AMD Matrix Instruction Calculator's detailed instruction information <https://github.com/ROCm/amd_matrix_instruction_calculator#example-of-querying-instruction-information>`_:
+
+   * ``Can co-execute with VALU``
+
+   * ``VALU co-execution cycles possible``
+
+
+Non-pipeline resources
+----------------------
+
+In this section, we describe a few resources that are not standalone
+pipelines but are important for understanding performance optimization
+on CDNA accelerators.
+
+.. _desc-barrier:
+
+Barrier
+^^^^^^^
+
+Barriers are resources on the compute-unit of a CDNA accelerator that
+are used to implement synchronization primitives (for example, HIP’s
+``__syncthreads``). Barriers are allocated to any workgroup that
+consists of more than a single wavefront.
+
+.. _desc-agprs:
+
+Accumulation vector general-purpose registers (AGPRs)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Accumulation vector general-purpose registers, or AGPRs, are special
+resources that are accessible to a subset of instructions focused on
+:ref:`MFMA <desc-mfma>` operations. These registers allow the MFMA
+unit to access more than the normal maximum of 256 architected
+:ref:`vector general-purpose registers (VGPRs) <desc-valu>` by having up to 256
+in the architected space and up to 256 in the accumulation space.
+Traditional VALU instructions can only use VGPRs in the architected
+space, and data can be moved to/from VGPRs↔AGPRs using specialized
+instructions (``v_accvgpr_*``). These data movement instructions may be
+used by the compiler to implement lower-cost register-spill/fills on
+architectures with AGPRs.
+
+AGPRs are not available on all AMD Instinct™ accelerators. GCN GPUs,
+such as the AMD Instinct MI50 had a 256 KiB VGPR file. The AMD
+Instinct MI100 (CDNA) has a 2x256 KiB register file, where one half
+is available as general-purpose VGPRs, and the other half is for matrix
+math accumulation VGPRs (AGPRs). The AMD Instinct :ref:`MI2XX <mixxx-note>`
+(CDNA2) has a 512 KiB VGPR file per CU, where each wave can dynamically request
+up to 256 KiB of VGPRs and an additional 256 KiB of AGPRs. For more information,
+refer to `this comment <https://github.com/ROCm/ROCm/issues/1689#issuecomment-1553751913>`_.
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/pipeline-metrics.rst b/projects/rocprofiler-compute/docs/conceptual/pipeline-metrics.rst
new file mode 100644
index 0000000000..f7bb4bcdae
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/pipeline-metrics.rst
@@ -0,0 +1,909 @@
+.. meta::
+   :description: Omniperf performance model: Pipeline metrics
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, pipeline, wavefront, metrics, launch, runtime
+              VALU, MFMA, instruction mix, FLOPs, arithmetic, operations
+
+****************
+Pipeline metrics
+****************
+
+In this section, we describe the metrics available in Omniperf to analyze the
+pipelines discussed in the :doc:`pipeline-descriptions`.
+
+.. _wavefront:
+
+Wavefront
+=========
+
+.. _wavefront-launch-stats:
+
+Wavefront launch stats
+----------------------
+
+The wavefront launch stats panel gives general information about the
+kernel launch:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 65 15
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Grid Size
+
+     - The total number of work-items (or, threads) launched as a part of
+       the kernel dispatch.  In HIP, this is equivalent to the total grid size
+       multiplied by the total workgroup (or, block) size.
+
+     - :ref:`Work-items <desc-work-item>`
+
+   * - Workgroup Size
+
+     - The total number of work-items (or, threads) in each workgroup
+       (or, block) launched as part of the kernel dispatch.  In HIP, this is
+       equivalent to the total block size.
+
+     - :ref:`Work-items <desc-work-item>`
+
+   * - Total Wavefronts
+
+     - The total number of wavefronts launched as part of the kernel dispatch.
+       On AMD Instinct™ CDNA™ accelerators and GCN™ GPUs, the wavefront size is
+       always 64 work-items.  Thus, the total number of wavefronts should be
+       equivalent to the ceiling of grid size divided by 64.
+
+     - :ref:`Wavefronts <desc-wavefront>`
+
+   * - Saved Wavefronts
+
+     - The total number of wavefronts saved at a context-save. See
+       `cwsr_enable <https://docs.kernel.org/gpu/amdgpu/module-parameters.html?highlight=cwsr>`_.
+
+     - :ref:`Wavefronts <desc-wavefront>`
+
+   * - Restored Wavefronts
+
+     - The total number of wavefronts restored from a context-save. See
+       `cwsr_enable <https://docs.kernel.org/gpu/amdgpu/module-parameters.html?highlight=cwsr>`_.
+
+     - :ref:`Wavefronts <desc-wavefront>`
+
+   * - VGPRs
+
+     - The number of architected vector general-purpose registers allocated for
+       the kernel, see :ref:`VALU <desc-valu>`.  Note: this may not exactly
+       match the number of VGPRs requested by the compiler due to allocation
+       granularity.
+
+     - :ref:`VGPRs <desc-valu>`
+
+   * - AGPRs
+
+     - The number of accumulation vector general-purpose registers allocated for
+       the kernel, see :ref:`AGPRs <desc-agprs>`.  Note: this may not exactly
+       match the number of AGPRs requested by the compiler due to allocation
+       granularity.
+
+     - :ref:`AGPRs <desc-agprs>`
+
+   * - SGPRs
+
+     - The number of scalar general-purpose registers allocated for the kernel,
+       see :ref:`SALU <desc-salu>`.  Note: this may not exactly match the number
+       of SGPRs requested by the compiler due to allocation granularity.
+
+     - :ref:`SGPRs <desc-salu>`
+
+   * - LDS Allocation
+
+     - The number of bytes of :doc:`LDS <local-data-share>` memory (or, shared
+       memory) allocated for this kernel.  Note: This may also be larger than
+       what was requested at compile time due to both allocation granularity and
+       dynamic per-dispatch LDS allocations.
+
+     - Bytes per :ref:`workgroup <desc-workgroup>`
+
+   * - Scratch Allocation
+
+     - The number of bytes of :ref:`scratch memory <memory-spaces>` requested
+       per work-item for this kernel. Scratch memory is used for stack memory
+       on the accelerator, as well as for register spills and restores.
+
+     - Bytes per :ref:`work-item <desc-work-item>`
+
+.. _wavefront-runtime-stats:
+
+Wavefront runtime stats
+-----------------------
+
+The wavefront runtime statistics gives a high-level overview of the
+execution of wavefronts in a kernel:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 18 65 17
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - :ref:`Kernel time <kernel-time>`
+
+     - The total duration of the executed kernel. Note: this should not be
+       directly compared to the wavefront cycles / timings below.
+
+     - Nanoseconds
+
+   * - :ref:`Kernel cycles <kernel-cycles>`
+
+     - The total duration of the executed kernel in cycles. Note: this should
+       not be directly compared to the wavefront cycles / timings below.
+
+     - Cycles
+
+   * - Instructions per wavefront
+
+     - The average number of instructions (of all types) executed per wavefront.
+       This is averaged over all wavefronts in a kernel dispatch.
+
+     - Instructions / wavefront
+
+   * - Wave cycles
+
+     - The number of cycles a wavefront in the kernel dispatch spent resident on
+       a compute unit per :ref:`normalization unit <normalization-units>`. This
+       is averaged over all wavefronts in a kernel dispatch.  Note: this should
+       not be directly compared to the kernel cycles above.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Dependency wait cycles
+
+     - The number of cycles a wavefront in the kernel dispatch stalled waiting
+       on memory of any kind (e.g., instruction fetch, vector or scalar memory,
+       etc.) per :ref:`normalization unit <normalization-units>`. This counter
+       is incremented at every cycle by *all* wavefronts on a CU stalled at a
+       memory operation.  As such, it is most useful to get a sense of how waves
+       were spending their time, rather than identification of a precise limiter
+       because another wave could be actively executing while a wave is stalled.
+       The sum of this metric, Issue Wait Cycles and Active Cycles should be
+       equal to the total Wave Cycles metric.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Issue Wait Cycles
+
+     - The number of cycles a wavefront in the kernel dispatch was unable to
+       issue an instruction for any reason (e.g., execution pipe back-pressure,
+       arbitration loss, etc.) per
+       :ref:`normalization unit <normalization-units>`.  This counter is
+       incremented at every cycle by *all* wavefronts on a CU unable to issue an
+       instruction.  As such, it is most useful to get a sense of how waves were
+       spending their time, rather than identification of a precise limiter
+       because another wave could be actively executing while a wave is issue
+       stalled.  The sum of this metric, Dependency Wait Cycles and Active
+       Cycles should be equal to the total Wave Cycles metric.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Active Cycles
+
+     - The average number of cycles a wavefront in the kernel dispatch was
+       actively executing instructions per
+       :ref:`normalization unit <normalization-units>`. This measurement is made
+       on a per-wavefront basis, and may include cycles that another wavefront
+       spent actively executing (on another execution unit, for example) or was
+       stalled.  As such, it is most useful to get a sense of how waves were
+       spending their time, rather than identification of a precise limiter. The
+       sum of this metric, Issue Wait Cycles and Active Wait Cycles should be
+       equal to the total Wave Cycles metric.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Wavefront Occupancy
+
+     - The time-averaged number of wavefronts resident on the accelerator over
+       the lifetime of the kernel. Note: this metric may be inaccurate for
+       short-running kernels (less than 1ms).
+
+     - :ref:`Wavefronts <desc-wavefront>`
+
+.. note::
+
+   As mentioned earlier, the measurement of kernel cycles and time typically
+   cannot be directly compared to, for example, wave cycles. This is due to two factors:
+   first, the kernel cycles/timings are measured using a counter that is
+   impacted by scheduling overhead, this is particularly noticeable for
+   "short-running" kernels (less than 1ms) where scheduling overhead forms a
+   significant portion of the overall kernel runtime. Secondly, the wave cycles
+   metric is incremented per-wavefront scheduled to a SIMD every cycle whereas
+   the kernel cycles counter is incremented only once per-cycle when *any*
+   wavefront is scheduled.
+
+.. _instruction-mix:
+
+Instruction mix
+===============
+
+The instruction mix panel shows a breakdown of the various types of instructions
+executed by the user’s kernel, and which pipelines on the
+:doc:`CU <compute-unit>` they were executed on. In addition, Omniperf reports
+further information about the breakdown of operation types for the
+:ref:`VALU <desc-valu>`, vector-memory, and :ref:`MFMA <desc-mfma>`
+instructions.
+
+.. note::
+
+   All metrics in this section count *instructions issued*, and *not* the total
+   number of operations executed. The values reported by these metrics will not
+   change regardless of the execution mask of the wavefront. Note that even if
+   the execution mask is identically zero (meaning that *no lanes are active*)
+   the instruction will still be counted, as CDNA accelerators still consider
+   these instructions *issued*. See
+   :mi200-isa-pdf:`EXECute Mask, section 3.3 of the CDNA2 ISA guide<19>` for
+   examples and further details.
+
+Overall instruction mix
+-----------------------
+
+This panel shows the total number of each type of instruction issued to
+the :doc:`various compute pipelines </conceptual/pipeline-descriptions>` on the
+:doc:`CU </conceptual/compute-unit>`. These are:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - :ref:`VALU <desc-valu>` instructions
+
+     - The total number of vector arithmetic logic unit (VALU) operations
+       issued. These are the workhorses of the
+       :doc:`compute unit <compute-unit>`, and are used to execute a wide range of
+       instruction types including floating point operations, non-uniform
+       address calculations, transcendental operations, integer operations,
+       shifts, conditional evaluation, etc.
+
+     - Instructions
+
+   * - VMEM instructions
+
+     - The total number of vector memory operations issued. These include most
+       loads, stores and atomic operations and all accesses to
+       :ref:`generic, global, private and texture <memory-spaces>` memory.
+
+     - Instructions
+
+   * - :doc:`LDS <local-data-share>` instructions
+
+     - The total number of LDS (also known as shared memory) operations issued.
+       These include loads, stores, atomics, and HIP's ``__shfl`` operations.
+
+     - Instructions
+
+   * - :ref:`MFMA <desc-mfma>` instructions
+
+     - The total number of matrix fused multiply-add instructions issued.
+
+     - Instructions
+
+   * - :ref:`SALU <desc-salu>` instructions
+
+     - The total number of scalar arithmetic logic unit (SALU) operations
+       issued. Typically these are used for address calculations, literal
+       constants, and other operations that are *provably* uniform across a
+       wavefront. Although scalar memory (SMEM) operations are issued by the
+       SALU, they are counted separately in this section.
+
+     - Instructions
+
+   * - SMEM instructions
+
+     - The total number of scalar memory (SMEM) operations issued. These are
+       typically used for loading kernel arguments, base-pointers and loads
+       from HIP's ``__constant__`` memory.
+
+     - Instructions
+
+   * - :ref:`Branch <desc-branch>` instructions
+
+     - The total number of branch operations issued. These typically consist of
+       jump or branch operations and are used to implement control flow.
+
+     - Instructions
+
+.. note::
+
+   Note, as mentioned in the :ref:`desc-branch` section: branch
+   operations are not used for execution mask updates, but only for "whole
+   wavefront" control flow changes.
+
+.. _valu-arith-instruction-mix:
+
+VALU arithmetic instruction mix
+-------------------------------
+
+.. warning::
+
+   Not all metrics in this section (for instance, the floating-point instruction
+   breakdowns) are available on CDNA accelerators older than the
+   :ref:`MI2XX <mixxx-note>` series.
+
+This panel details the various types of vector instructions that were
+issued to the :ref:`VALU <desc-valu>`. The metrics in this section do *not*
+include :ref:`MFMA <desc-mfma>` instructions using the same precision; for
+instance, the “F16-ADD” metric does not include any 16-bit floating point
+additions executed as part of an MFMA instruction using the same precision.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 15 65 20
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - INT32
+
+     - The total number of instructions operating on 32-bit integer operands
+       issued to the VALU per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - INT64
+
+     - The total number of instructions operating on 64-bit integer operands
+       issued to the VALU per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F16-ADD
+
+     - The total number of addition instructions operating on 16-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F16-MUL
+
+     - The total number of multiplication instructions operating on 16-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F16-FMA
+
+     - The total number of fused multiply-add instructions operating on 16-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F16-TRANS
+
+     - The total number of transcendental instructions (e.g., `sqrt`) operating
+       on 16-bit floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F32-ADD
+
+     - The total number of addition instructions operating on 32-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F32-MUL
+
+     - The total number of multiplication instructions operating on 32-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F32-FMA
+
+     - The total number of fused multiply-add instructions operating on 32-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F32-TRANS
+
+     - The total number of transcendental instructions (such as ``sqrt``)
+       operating on 32-bit floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F64-ADD
+
+     - The total number of addition instructions operating on 64-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F64-MUL
+
+     - The total number of multiplication instructions operating on 64-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F64-FMA
+
+     - The total number of fused multiply-add instructions operating on 64-bit
+       floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - F64-TRANS
+
+     - The total number of transcendental instructions (such as `sqrt`)
+       operating on 64-bit floating-point operands issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Conversion
+
+     - The total number of type conversion instructions (such as converting data
+       to or from F32↔F64) issued to the VALU per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+For an example of these counters in action, refer to
+:ref:`valu-arith-instruction-mix-ex`.
+
+.. _vmem-instruction-mix:
+
+VMEM instruction mix
+--------------------
+
+This section breaks down the types of vector memory (VMEM) instructions
+that were issued. Refer to the
+:ref:`Instruction Counts metrics section <ta-instruction-counts>` under address
+processor front end of the vL1D cache for descriptions of these VMEM
+instructions.
+
+.. _mfma-instruction-mix:
+
+MFMA instruction mix
+--------------------
+
+.. warning::
+
+   The metrics in this section are only available on CDNA2
+   (:ref:`MI2XX <mixxx-note>`) accelerators and newer.
+
+This section details the types of Matrix Fused Multiply-Add
+(:ref:`MFMA <desc-mfma>`) instructions that were issued. Note that
+MFMA instructions are classified by the type of input data they operate on, and
+*not* the data type the result is accumulated to.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 60 17
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - MFMA-I8 Instructions
+
+     - The total number of 8-bit integer :ref:`MFMA <desc-mfma>` instructions
+       issued per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - MFMA-F16 Instructions
+
+     - The total number of 16-bit floating point :ref:`MFMA <desc-mfma>`
+       instructions issued per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - MFMA-BF16 Instructions
+
+     - The total number of 16-bit brain floating point :ref:`MFMA <desc-mfma>`
+       instructions issued per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - MFMA-F32 Instructions
+
+     - The total number of 32-bit floating-point :ref:`MFMA <desc-mfma>`
+       instructions issued per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - MFMA-F64 Instructions
+
+     - The total number of 64-bit floating-point :ref:`MFMA <desc-mfma>`
+       instructions issued per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+Compute pipeline
+================
+
+.. _metrics-flop-count:
+
+FLOP counting conventions
+-------------------------
+
+Omniperf’s conventions for VALU FLOP counting are as follows:
+
+* Addition or multiplication: 1 operation
+
+* Transcendentals: 1 operation
+
+* Fused multiply-add (FMA): 2 operations
+
+Integer operations (IOPs) do not use this convention. They are counted
+as a single operation regardless of the instruction type.
+
+.. note::
+
+   Packed operations which operate on multiple operands in the same instruction
+   are counted identically to the underlying instruction type. For example, the
+   ``v_pk_add_f32`` instruction on :ref:`MI2XX <mixxx-note>`, which performs an
+   add operation on two pairs of aligned 32-bit floating-point operands is
+   counted only as a single addition -- that is, 1 operation.
+
+As discussed in the :ref:`instruction-mix` section, the FLOP/IOP
+metrics in this section do not take into account the execution mask of
+the operation, and will report the same value even if the execution mask
+is identically zero.
+
+For example, a FMA instruction operating on 32-bit floating-point
+operands (such as ``v_fma_f32`` on a :ref:`MI2XX <mixxx-note>` accelerator)
+would be counted as 128 total FLOPs: 2 operations (due to the
+instruction type) multiplied by 64 operations (because the wavefront is
+composed of 64 work-items).
+
+.. _compute-speed-of-light:
+
+Compute Speed-of-Light
+----------------------
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section are
+   currently computed with the maximum achievable clock frequency, as reported
+   by ``rocminfo``, for an accelerator. This may not be realistic for all
+   workloads.
+
+This section reports the number of floating-point and integer operations
+executed on the :ref:`VALU <desc-valu>` and :ref:`MFMA <desc-mfma>` units in
+various precisions. We note that unlike the
+:ref:`VALU instruction mix <valu-arith-instruction-mix>` and
+:ref:`MFMA instruction mix <mfma-instruction-mix>` sections, the metrics here
+are reported as FLOPs and IOPs, that is, the total number of operations
+executed.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - VALU FLOPs
+
+     - The total floating-point operations executed per second on the
+       :ref:`VALU <desc-valu>`. This is also presented as a percent of the peak
+       theoretical FLOPs achievable on the specific accelerator. Note: this does
+       not include any floating-point operations from :ref:`MFMA <desc-mfma>`
+       instructions.
+
+     - GFLOPs
+
+   * - VALU IOPs
+
+     - The total integer operations executed per second on the
+       :ref:`VALU <desc-valu>`. This is also presented as a percent of the peak
+       theoretical IOPs achievable on the specific accelerator. Note: this does
+       not include any integer operations from :ref:`MFMA <desc-mfma>`
+       instructions.
+
+     - GIOPs
+
+   * - MFMA FLOPs (BF16)
+
+     - The total number of 16-bit brain floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 16-bit
+       brain floating point operations from :ref:`VALU <desc-valu>`
+       instructions. This is also presented as a percent of the peak theoretical
+       BF16 MFMA operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - MFMA FLOPs (F16)
+
+     - The total number of 16-bit floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 16-bit
+       floating point operations from :ref:`VALU <desc-valu>` instructions. This
+       is also presented as a percent of the peak theoretical F16 MFMA
+       operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - MFMA FLOPs (F32)
+
+     - The total number of 32-bit floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 32-bit
+       floating point operations from :ref:`VALU <desc-valu>` instructions. This
+       is also presented as a percent of the peak theoretical F32 MFMA
+       operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - MFMA FLOPs (F64)
+
+     - The total number of 64-bit floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 64-bit
+       floating point operations from :ref:`VALU <desc-valu>` instructions. This
+       is also presented as a percent of the peak theoretical F64 MFMA
+       operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - MFMA IOPs (INT8)
+
+     - The total number of 8-bit integer :ref:`MFMA <desc-mfma>` operations
+       executed per second. Note: this does not include any 8-bit integer
+       operations from :ref:`VALU <desc-valu>` instructions. This is also
+       presented as a percent of the peak theoretical INT8 MFMA operations
+       achievable on the specific accelerator.
+
+     - GIOPs
+
+.. _pipeline-stats:
+
+Pipeline statistics
+-------------------
+
+This section reports a number of key performance characteristics of
+various execution units on the :doc:`CU <compute-unit>`. Refer to
+:ref:`ipc-example` for a detailed dive into these metrics, and the
+:ref:`scheduler <desc-scheduler>` the for a high-level overview of execution
+units and instruction issue.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 65 15
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - IPC
+
+     - The ratio of the total number of instructions executed on the
+       :doc:`CU <compute-unit>` over the
+       :ref:`total active CU cycles <total-active-cu-cycles>`.
+
+     - Instructions per-cycle
+
+   * - IPC (Issued)
+
+     - The ratio of the total number of
+       (non-:ref:`internal <ipc-internal-instructions>`) instructions issued over
+       the number of cycles where the :ref:`scheduler <desc-scheduler>` was
+       actively working on issuing instructions. Refer to the
+       :ref:`Issued IPC <issued-ipc>` example for further detail.
+
+     - Instructions per-cycle
+
+   * - SALU utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`SALU <desc-salu>` was busy executing instructions. Computed as the
+       ratio of the total number of cycles spent by the
+       :ref:`scheduler <desc-scheduler>` issuing SALU / :ref:`SMEM <desc-smem>`
+       instructions over the :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - VALU utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`VALU <desc-valu>` was busy executing instructions. Does not include
+       :ref:`VMEM <desc-vmem>` operations. Computed as the ratio of the total
+       number of cycles spent by the :ref:`scheduler <desc-scheduler>` issuing
+       VALU instructions over the :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - VMEM utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`VMEM <desc-vmem>` unit was busy executing instructions, including
+       both global/generic and spill/scratch operations (see the
+       :ref:`VMEM instruction count metrics <ta-instruction-counts>` for more
+       detail).  Does not include :ref:`VALU <desc-valu>` operations. Computed
+       as the ratio of the total number of cycles spent by the
+       :ref:`scheduler <desc-scheduler>` issuing VMEM instructions over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - Branch utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`branch <desc-branch>` unit was busy executing instructions.
+       Computed as the ratio of the total number of cycles spent by the
+       :ref:`scheduler <desc-scheduler>` issuing branch instructions over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - VALU active threads
+
+     - Indicates the average level of :ref:`divergence <desc-divergence>` within
+       a wavefront over the lifetime of the kernel. The number of work-items
+       that were active in a wavefront during execution of each
+       :ref:`VALU <desc-valu>` instruction, time-averaged over all VALU
+       instructions run on all wavefronts in the kernel.
+
+     - Work-items
+
+   * - MFMA utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`MFMA <desc-mfma>` unit was busy executing instructions. Computed as
+       the ratio of the total number of cycles spent by the
+       :ref:`MFMA <desc-salu>` was busy over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - MFMA instruction cycles
+
+     - The average duration of :ref:`MFMA <desc-mfma>` instructions in this
+       kernel in cycles. Computed as the ratio of the total number of cycles the
+       MFMA unit was busy over the total number of MFMA instructions. Compare
+       to, for example, the
+       `AMD Matrix Instruction Calculator <https://github.com/RadeonOpenCompute/amd_matrix_instruction_calculator>`_.
+
+     - Cycles per instruction
+
+   * - VMEM latency
+
+     - The average number of round-trip cycles (that is, from issue to data
+       return / acknowledgment) required for a VMEM instruction to complete.
+
+     - Cycles
+
+   * - SMEM latency
+
+     - The average number of round-trip cycles (that is, from issue to data
+       return / acknowledgment) required for a SMEM instruction to complete.
+
+     - Cycles
+
+.. note::
+
+   The branch utilization reported in this section also includes time spent in
+   other instruction types (namely: ``s_endpgm``) that are *typically* a very
+   small percentage of the overall kernel execution. This complication is
+   omitted for simplicity, but may result in small amounts of branch utilization
+   (typically less than 1%) for otherwise branch-less kernels.
+
+.. _arithmetic-operations:
+
+Arithmetic operations
+---------------------
+
+This section reports the total number of floating-point and integer
+operations executed in various precisions. Unlike the
+:ref:`compute-speed-of-light` panel, this section reports both
+:ref:`VALU <desc-valu>` and :ref:`MFMA <desc-mfma>` operations of the same precision
+(e.g., F32) in the same metric. Additionally, this panel lets the user
+control how the data is normalized (i.e., control the
+:ref:`normalization unit <normalization-units>`), while the speed-of-light panel does
+not. For more detail on how operations are counted see the
+:ref:`FLOP counting convention <metrics-flop-count>` section.
+
+.. warning::
+
+   As discussed in :ref:`instruction-mix`, the metrics in this section do not
+   take into account the execution mask of the operation, and will report the
+   same value even if EXEC is identically zero.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 18 65 17
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - FLOPs (Total)
+
+     - The total number of floating-point operations executed on either the
+       :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - FLOP per :ref:`normalization unit <normalization-units>`
+
+   * - IOPs (Total)
+
+     - The total number of integer operations executed on either the
+       :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - IOP per :ref:`normalization unit <normalization-units>`
+
+   * - F16 OPs
+
+     - The total number of 16-bit floating-point operations executed on either the
+       :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - FLOP per :ref:`normalization unit <normalization-units>`
+
+   * - BF16 OPs
+
+     - The total number of 16-bit brain floating-point operations executed on either the
+       :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`. Note: on current CDNA
+       accelerators, the VALU has no native BF16 instructions.
+
+     - FLOP per :ref:`normalization unit <normalization-units>`
+
+   * - F32 OPs
+
+     - The total number of 32-bit floating-point operations executed on either
+       the :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - FLOP per :ref:`normalization unit <normalization-units>`
+
+   * - F64 OPs
+
+     - The total number of 64-bit floating-point operations executed on either
+       the :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - FLOP per :ref:`normalization unit <normalization-units>`
+
+   * - INT8 OPs
+
+     - The total number of 8-bit integer operations executed on either the
+       :ref:`VALU <desc-valu>` or :ref:`MFMA <desc-mfma>` units, per
+       :ref:`normalization unit <normalization-units>`. Note: on current CDNA
+       accelerators, the VALU has no native INT8 instructions.
+
+     - IOPs per :ref:`normalization unit <normalization-units>`
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/references.rst b/projects/rocprofiler-compute/docs/conceptual/references.rst
new file mode 100644
index 0000000000..9f3d32cd80
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/references.rst
@@ -0,0 +1,26 @@
+.. meta::
+   :description: Omniperf performance model: References
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, HIP, GCN, LLVM, docs, documentation, training
+
+**********
+References
+**********
+
+Some sections in :doc:`this chapter </conceptual/performance-model>` cite the
+following publicly available documentation.
+
+* :hip-training-pdf:`Introduction to AMD GPU Programming with HIP <>`
+
+* :mi200-isa-pdf:`CDNA2 ISA Reference Guide <>`
+
+* :cdna2-white-paper:`CDNA2 white paper <>`
+
+* :hsa-runtime-pdf:`HSA Runtime Programmer's Reference Manual <>`
+
+* :gcn-crash-course:`The AMD GCN Architecture - A Crash Course (Layla Mah) <>`
+
+* :mantor-gcn-pdf:`AMD Radeon HD7970 with GCN Architecture <>`
+
+* :mantor-vega10-pdf:`AMD Radeon Next Generation GPU Architecture - Vega10 <>`
+
+* :llvm-docs:`LLVM User Guide for AMDGPU Backend <>`
diff --git a/projects/rocprofiler-compute/docs/conceptual/shader-engine.rst b/projects/rocprofiler-compute/docs/conceptual/shader-engine.rst
new file mode 100644
index 0000000000..8295c45160
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/shader-engine.rst
@@ -0,0 +1,707 @@
+.. meta::
+   :description: Omniperf performance model: Shader engine (SE)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, shader, engine, sL1D, L1I, workgroup manager, SPI
+
+******************
+Shader engine (SE)
+******************
+
+The :doc:`compute units <compute-unit>` on a CDNA™ accelerator are grouped
+together into a higher-level organizational unit called a shader engine (SE):
+
+.. figure:: ../data/performance-model/selayout.png
+   :align: center
+   :alt: Example of CU-grouping into shader engines
+   :width: 800
+
+   Example of CU-grouping into shader engines on AMD Instinct MI-series
+   accelerators.
+
+The number of CUs on a SE varies from chip to chip -- see for example
+:hip-training-pdf:`20`. In addition, newer accelerators such as the AMD
+Instinct™ MI 250X have 8 SEs per accelerator.
+
+For the purposes of Omniperf, we consider resources that are shared between
+multiple CUs on a single SE as part of the SE's metrics.
+
+These include:
+
+* The :ref:`scalar L1 data cache <desc-sl1d>`
+
+* The :ref:`L1 instruction cache <desc-l1i>`
+
+* The :ref:`workgroup manager <desc-spi>`
+
+.. _desc-sl1d:
+
+Scalar L1 data cache (sL1D)
+===========================
+
+The Scalar L1 Data cache (sL1D) can cache data accessed from scalar load
+instructions (and scalar store instructions on architectures where they exist)
+from wavefronts in the :doc:`CUs <compute-unit>`. The sL1D is shared between
+multiple CUs (:gcn-crash-course:`36`) -- the exact number of CUs depends on the
+architecture in question (3 CUs in GCN™ GPUs and MI100, 2 CUs in
+:ref:`MI2XX <mixxx-note>`) -- and is backed by the :doc:`L2 cache <l2-cache>`.
+
+In typical usage, the data in the sL1D is comprised of:
+
+* Kernel arguments, such as pointers,
+  `non-populated <https://llvm.org/docs/AMDGPUUsage.html#amdgpu-amdhsa-sgpr-register-set-up-order-table>`_
+  grid and block dimensions, and others
+
+* HIP's ``__constant__`` memory, when accessed in a provably uniform manner
+  [#uniform-access]_
+
+* Other memory, when accessed in a provably uniform manner, *and* the backing
+  memory is provably constant [#uniform-access]_
+
+.. _desc-sl1d-sol:
+
+Scalar L1D Speed-of-Light
+-------------------------
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section are
+   currently computed with the maximum achievable clock frequency, as reported
+   by ``rocminfo``, for an accelerator. This may not be realistic for all
+   workloads.
+
+The Scalar L1D speed-of-light chart shows some key metrics of the sL1D
+cache as a comparison with the peak achievable values of those metrics:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 65 15
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Bandwidth
+
+     - The number of bytes looked up in the sL1D cache, as a percent of the peak
+       theoretical bandwidth. Calculated as the ratio of sL1D requests over the
+       :ref:`total sL1D cycles <total-sl1d-cycles>`.
+
+     - Percent
+
+   * - Cache Hit Rate
+
+     - The percent of sL1D requests that hit [#sl1d-cache]_ on a previously
+       loaded line in the cache. Calculated as the ratio of the number of sL1D
+       requests that hit over the number of all sL1D requests.
+
+     - Percent
+
+   * - sL1D-L2 BW
+
+     - The number of bytes requested by the sL1D from the L2 cache, as a percent
+       of the peak theoretical sL1D → L2 cache bandwidth.  Calculated as the
+       ratio of the total number of requests from the sL1D to the L2 cache over
+       the :ref:`total sL1D-L2 interface cycles <total-sl1d-cycles>`.
+
+     - Percent
+
+.. _desc-sl1d-stats:
+
+Scalar L1D cache accesses
+-------------------------
+
+This panel gives more detail on the types of accesses made to the sL1D,
+and the hit/miss statistics.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Requests
+
+     - The total number of requests, of any size or type, made to the sL1D per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Hits
+
+     - The total number of sL1D requests that hit on a previously loaded cache
+       line, per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Misses - Non Duplicated
+
+     - The total number of sL1D requests that missed on a cache line that *was
+       not* already pending due to another request, per
+       :ref:`normalization unit <normalization-units>`. See :ref:`desc-sl1d-sol`
+       for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Misses - Duplicated
+
+     - The total number of sL1D requests that missed on a cache line that *was*
+       already pending due to another request, per
+       :ref:`normalization unit <normalization-units>`. See
+       :ref:`desc-sl1d-sol` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Cache Hit Rate
+
+     - Indicates the percent of sL1D requests that hit on a previously loaded
+       line the cache. The ratio of the number of sL1D requests that hit
+       [#sl1d-cache]_ over the number of all sL1D requests.
+
+     - Percent
+
+   * - Read Requests (Total)
+
+     - The total number of sL1D read requests of any size, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Atomic Requests
+
+     - The total number of sL1D atomic requests of any size, per
+       :ref:`normalization unit <normalization-units>`. Typically unused on CDNA
+       accelerators.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Read Requests (1 DWord)
+
+     - The total number of sL1D read requests made for a single dword of data
+       (4B), per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Read Requests (2 DWord)
+
+     - The total number of sL1D read requests made for a two dwords of data
+       (8B), per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Read Requests (4 DWord)
+
+     - The total number of sL1D read requests made for a four dwords of data
+       (16B), per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Read Requests (8 DWord)
+
+     - The total number of sL1D read requests made for a eight dwords of data
+       (32B), per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Read Requests (16 DWord)
+
+     - The total number of sL1D read requests made for a sixteen dwords of data
+       (64B), per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+.. _desc-sl1d-l2-interface:
+
+sL1D ↔ L2 Interface
+-------------------
+
+This panel gives more detail on the data requested across the
+sL1D↔
+:doc:`L2 <l2-cache>` interface.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - sL1D-L2 BW
+
+     - The total number of bytes read from, written to, or atomically updated
+       across the sL1D↔:doc:`L2 <l2-cache>` interface, per
+       :ref:`normalization unit <normalization-units>`. Note that sL1D writes
+       and atomics are typically unused on current CDNA accelerators, so in the
+       majority of cases this can be interpreted as an sL1D→L2 read bandwidth.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`
+
+   * - Read Requests
+
+     - The total number of read requests from sL1D to the :doc:`L2 <l2-cache>`,
+       per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Write Requests
+
+     - The total number of write requests from sL1D to the :doc:`L2 <l2-cache>`,
+       per :ref:`normalization unit <normalization-units>`. Typically unused on
+       current CDNA accelerators.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Atomic Requests
+
+     - The total number of atomic requests from sL1D to the
+       :doc:`L2 <l2-cache>`, per
+       :ref:`normalization unit <normalization-units>`. Typically unused on
+       current CDNA accelerators.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Stall Cycles
+
+     - The total number of cycles the sL1D↔
+       :doc:`L2 <l2-cache>` interface was stalled, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+.. rubric:: Footnotes
+
+.. [#uniform-access] The scalar data cache is used when the compiler emits
+   scalar loads to access data. This requires that the data be *provably*
+   uniformly accesses (that is, the compiler can verify that all work-items in a
+   wavefront access the same data), *and* that the data can be proven to be
+   read-only (for instance, HIP's ``__constant__`` memory, or properly
+   ``__restrict__``\ed pointers to avoid write-aliasing). Access of
+   ``__constant__`` memory for example is not guaranteed to go through the sL1D
+   if the wavefront loads a non-uniform value.
+
+.. [#sl1d-cache] Unlike the :doc:`vL1D <vector-l1-cache>` and
+   :doc:`L2 <l2-cache>` caches, the sL1D cache on AMD Instinct MI-series CDNA
+   accelerators does *not* use the "hit-on-miss" approach to reporting cache
+   hits. That is, if while satisfying a miss, another request comes in that
+   would hit on the same pending cache line, the subsequent request will be
+   counted as a *duplicated miss*.
+
+.. _desc-l1i:
+
+L1 Instruction Cache (L1I)
+==========================
+
+As with the :ref:`sL1D <desc-sL1D>`, the L1 Instruction (L1I) cache is shared
+between multiple CUs on a shader-engine, where the precise number of CUs
+sharing a L1I depends on the architecture in question (:gcn-crash-course:`36`)
+and is backed by the :doc:`L2 cache <l2-cache>`. Unlike the sL1D, the
+instruction cache is read-only.
+
+.. _desc-l1i-sol:
+
+L1I Speed-of-Light
+------------------
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section are
+   currently computed with the maximum achievable clock frequency, as reported
+   by ``rocminfo``, for an accelerator. This may not be realistic for all
+   workloads.
+
+The L1 Instruction Cache speed-of-light chart shows some key metrics of
+the L1I cache as a comparison with the peak achievable values of those
+metrics:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Bandwidth
+
+     - The number of bytes looked up in the L1I cache, as a percent of the peak
+       theoretical bandwidth. Calculated as the ratio of L1I requests over the
+       :ref:`total L1I cycles <total-l1i-cycles>`.
+
+     - Percent
+
+   * - Cache Hit Rate
+
+     - The percent of L1I requests that hit on a previously loaded line the
+       cache. Calculated as the ratio of the number of L1I requests that hit
+       [#l1i-cache]_ over the number of all L1I requests.
+
+     - Percent
+
+   * - L1I-L2 BW
+
+     - The percent of the peak theoretical L1I → L2 cache request bandwidth
+       achieved. Calculated as the ratio of the total number of requests from
+       the L1I to the L2 cache over the
+       :ref:`total L1I-L2 interface cycles <total-l1i-cycles>`.
+
+     - Percent
+
+   * - Instruction Fetch Latency
+
+     - The average number of cycles spent to fetch instructions to a
+       :doc:`CU <compute-unit>`.
+
+     - Cycles
+
+.. _desc-l1i-stats:
+
+L1I cache accesses
+------------------
+
+This panel gives more detail on the hit/miss statistics of the L1I:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Requests
+
+     - The total number of requests made to the L1I per
+       :ref:`normalization-unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Hits
+
+     - The total number of L1I requests that hit on a previously loaded cache
+       line, per :ref:`normalization-unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Misses - Non Duplicated
+
+     - The total number of L1I requests that missed on a cache line that
+       *were not* already pending due to another request, per
+       :ref:`normalization-unit <normalization-units>`. See note in
+       :ref:`desc-l1i-sol` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`.
+
+   * - Misses - Duplicated
+
+     - The total number of L1I requests that missed on a cache line that *were*
+       already pending due to another request, per
+       :ref:`normalization-unit <normalization-units>`. See note in
+       :ref:`desc-l1i-sol` for more detail.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Cache Hit Rate
+
+     - The percent of L1I requests that hit [#l1i-cache]_ on a previously loaded
+       line the cache. Calculated as the ratio of the number of L1I requests
+       that hit over the number of all L1I requests.
+
+     - Percent
+
+L1I - L2 interface
+------------------
+
+This panel gives more detail on the data requested across the
+L1I-:doc:`L2 <l2-cache>` interface.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - L1I-L2 BW
+
+     - The total number of bytes read across the L1I-:doc:`L2 <l2-cache>`
+       interface, per :ref:`normalization unit <normalization-units>`.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`
+
+.. rubric:: Footnotes
+
+.. [#l1i-cache] Unlike the :doc:`vL1D <vector-l1-cache>` and
+   :doc:`L2 <l2-cache>` caches, the L1I cache on AMD Instinct MI-series CDNA
+   accelerators does *not* use the "hit-on-miss" approach to reporting cache
+   hits. That is, if while satisfying a miss, another request comes in that
+   would hit on the same pending cache line, the subsequent request will be
+   counted as a *duplicated miss*.
+
+.. _desc-spi:
+
+Workgroup manager (SPI)
+=======================
+
+The workgroup manager (SPI) is the bridge between the
+:doc:`command processor <command-processor>` and the
+:doc:`compute units <compute-unit>`. After the command processor processes a
+kernel dispatch, it will then pass the dispatch off to the workgroup manager,
+which then schedules :ref:`workgroups <desc-workgroup>` onto the compute units.
+As workgroups complete execution and resources become available, the
+workgroup manager will schedule new workgroups onto compute units. The workgroup
+manager’s metrics therefore are focused on reporting the following:
+
+*  Utilizations of various parts of the accelerator that the workgroup
+   manager interacts with (and the workgroup manager itself)
+
+*  How many workgroups were dispatched, their size, and how many
+   resources they used
+
+*  Percent of scheduler opportunities (cycles) where workgroups failed
+   to dispatch, and
+
+*  Percent of scheduler opportunities (cycles) where workgroups failed
+   to dispatch due to lack of a specific resource on the CUs (for instance, too
+   many VGPRs allocated)
+
+This gives you an idea of why the workgroup manager couldn’t schedule more
+wavefronts onto the device, and is most useful for workloads that you suspect to
+be limited by scheduling or launch rate.
+
+As discussed in :doc:`Command processor <command-processor>`, the command
+processor on AMD Instinct MI-series architectures contains four hardware
+scheduler-pipes, each with eight software threads (:mantor-vega10-pdf:`19`). Each
+scheduler-pipe can issue a kernel dispatch to the workgroup manager to schedule
+concurrently. Therefore, some workgroup manager metrics are presented relative
+to the utilization of these scheduler-pipes (for instance, whether all four are
+issuing concurrently).
+
+.. note::
+
+   Current versions of the profiling libraries underlying Omniperf attempt to
+   serialize concurrent kernels running on the accelerator, as the performance
+   counters on the device are global (that is, shared between concurrent
+   kernels). This means that these scheduler-pipe utilization metrics are
+   expected to reach (for example) a maximum of one pipe active -- only 25%.
+
+Workgroup manager utilizations
+------------------------------
+
+This section describes the utilization of the workgroup manager, and the
+hardware components it interacts with.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 65 15
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Accelerator utilization
+
+     - The percent of cycles in the kernel where the accelerator was actively
+       doing any work.
+
+     - Percent
+
+   * - Scheduler-pipe utilization
+
+     - The percent of :ref:`total scheduler-pipe cycles <total-pipe-cycles>` in
+       the kernel where the scheduler-pipes were actively doing any work. Note:
+       this value is expected to range between 0% and 25%. See :ref:`desc-spi`.
+
+     - Percent
+
+   * - Workgroup manager utilization
+
+     - The percent of cycles in the kernel where the workgroup manager was
+       actively doing any work.
+
+     - Percent
+
+   * - Shader engine utilization
+
+     - The percent of :ref:`total shader engine cycles <total-se-cycles>` in the
+       kernel where any CU in a shader-engine was actively doing any work,
+       normalized over all shader-engines. Low values (e.g., << 100%) indicate
+       that the accelerator was not fully saturated by the kernel, or a
+       potential load-imbalance issue.
+
+     - Percent
+
+   * - SIMD utilization
+
+     - The percent of :ref:`total SIMD cycles <total-simd-cycles>` in the kernel
+       where any :ref:`SIMD <desc-valu>` on a CU was actively doing any work,
+       summed over all CUs. Low values (less than 100%) indicate that the
+       accelerator was not fully saturated by the kernel, or a potential
+       load-imbalance issue.
+
+     - Percent
+
+   * - Dispatched workgroups
+
+     - The total number of workgroups forming this kernel launch.
+
+     - Workgroups
+
+   * - Dispatched wavefronts
+
+     - The total number of wavefronts, summed over all workgroups, forming this
+       kernel launch.
+
+     - Wavefronts
+
+   * - VGPR writes
+
+     - The average number of cycles spent initializing :ref:`VGPRs <desc-valu>`
+       at wave creation.
+
+     - Cycles/wave
+
+   * - SGPR Writes
+
+     - The average number of cycles spent initializing :ref:`SGPRs <desc-salu>`
+       at wave creation.
+
+     - Cycles/wave
+
+Resource allocation
+-------------------
+
+This panel gives more detail on how workgroups and wavefronts were scheduled
+onto compute units, and what occupancy limiters they hit -- if any. When
+analyzing these metrics, you should also take into account their
+achieved occupancy -- such as
+:ref:`wavefront occupancy <wavefront-runtime-stats>`. A kernel may be occupancy
+limited by LDS usage, for example, but may still achieve high occupancy levels
+such that improving occupancy further may not improve performance. See
+:ref:`occupancy-example` for details.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Not-scheduled rate (Workgroup Manager)
+
+     - The percent of :ref:`total scheduler-pipe cycles <total-pipe-cycles>` in
+       the kernel where a workgroup could not be scheduled to a
+       :doc:`CU <compute-unit>` due to a bottleneck within the workgroup manager
+       rather than a lack of a CU or :ref:`SIMD <desc-valu>` with sufficient
+       resources. Note: this value is expected to range between 0-25%. See note
+       in :ref:`workgroup manager <desc-spi>` description.
+
+     - Percent
+
+   * - Not-scheduled rate (Scheduler-Pipe)
+
+     - The percent of :ref:`total scheduler-pipe cycles <total-pipe-cycles>` in
+       the kernel where a workgroup could not be scheduled to a
+       :doc:`CU <compute-unit>` due to a bottleneck within the scheduler-pipes
+       rather than a lack of a CU or :ref:`SIMD <desc-valu>` with sufficient
+       resources. Note: this value is expected to range between 0-25%, see note
+       in :ref:`workgroup manager <desc-spi>` description.
+
+     - Percent
+
+   * - Scheduler-Pipe Stall Rate
+
+     - The percent of :ref:`total scheduler-pipe cycles <total-pipe-cycles>` in
+       the kernel where a workgroup could not be scheduled to a
+       :doc:`CU <compute-unit>` due to occupancy limitations (like a lack of a
+       CU or :ref:`SIMD <desc-valu>` with sufficient resources). Note: this
+       value is expected to range between 0-25%, see note in
+       :ref:`workgroup manager <desc-spi>` description.
+
+     - Percent
+
+   * - Scratch Stall Rate
+
+     - The percent of :ref:`total shader-engine cycles <total-se-cycles>` in the
+       kernel where a workgroup could not be scheduled to a
+       :doc:`CU <compute-unit>` due to lack of
+       :ref:`private (a.k.a., scratch) memory <memory-type>` slots. While this
+       can reach up to 100%, note that the actual occupancy limitations on a
+       kernel using private memory are typically quite small (for example, less
+       than 1% of the total number of waves that can be scheduled to an
+       accelerator).
+
+     - Percent
+
+   * - Insufficient SIMD Waveslots
+
+     - The percent of :ref:`total SIMD cycles <total-simd-cycles>` in the kernel
+       where a workgroup could not be scheduled to a  :ref:`SIMD <desc-valu>`
+       due to lack of available :ref:`waveslots <desc-valu>`.
+
+     - Percent
+
+   * - Insufficient SIMD VGPRs
+
+     - The percent of :ref:`total SIMD cycles <total-simd-cycles>` in the kernel
+       where a workgroup could not be scheduled to a  :ref:`SIMD <desc-valu>`
+       due to lack of available :ref:`VGPRs <desc-valu>`.
+
+     - Percent
+
+   * - Insufficient SIMD SGPRs
+
+     - The percent of :ref:`total SIMD cycles <total-simd-cycles>` in the kernel
+       where a workgroup could not be scheduled to a :ref:`SIMD <desc-valu>`
+       due to lack of available :ref:`SGPRs <desc-salu>`.
+
+     - Percent
+
+   * - Insufficient CU LDS
+
+     - The percent of :ref:`total CU cycles <total-cu-cycles>` in the kernel
+       where a workgroup could not be scheduled to a :doc:`CU <compute-unit>`
+       due to lack of available :doc:`LDS <local-data-share>`.
+
+     - Percent
+
+   * - Insufficient CU Barriers
+
+     - The percent of :ref:`total CU cycles <total-cu-cycles>` in the kernel
+       where a workgroup could not be scheduled to a :doc:`CU <compute-unit>`
+       due to lack of available :ref:`barriers <desc-barrier>`.
+
+     - Percent
+
+   * - Reached CU Workgroup Limit
+
+     - The percent of :ref:`total CU cycles <total-cu-cycles>` in the kernel
+       where a workgroup could not be scheduled to a :doc:`CU <compute-unit>`
+       due to limits within the workgroup manager.  This is expected to be
+       always be zero on CDNA2 or newer accelerators (and small for previous
+       accelerators).
+
+     - Percent
+
+   * - Reached CU Wavefront Limit
+
+     - The percent of :ref:`total CU cycles <total-cu-cycles>` in the kernel
+       where a wavefront could not be scheduled to a :doc:`CU <compute-unit>`
+       due to limits within the workgroup manager.  This is expected to be
+       always be zero on CDNA2 or newer accelerators (and small for previous
+       accelerators).
+
+     - Percent
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/system-speed-of-light.rst b/projects/rocprofiler-compute/docs/conceptual/system-speed-of-light.rst
new file mode 100644
index 0000000000..f01be4b67b
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/system-speed-of-light.rst
@@ -0,0 +1,318 @@
+.. meta::
+   :description: Omniperf performance model: System Speed-of-Light
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD, system, speed of light
+
+*********************
+System Speed-of-Light
+*********************
+
+System Speed-of-Light summarizes some of the key metrics from various sections
+of Omniperf’s profiling report.
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section are
+   currently computed with the maximum achievable clock frequency, as reported
+   by ``rocminfo``, for an accelerator. This may not be realistic for
+   all workloads.
+
+   Also, not all metrics -- such as FLOP counters -- are available on all AMD
+   Instinct™ MI-series accelerators. For more detail on how operations are
+   counted, see the :ref:`metrics-flop-count` section.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - :ref:`VALU <desc-valu>` FLOPs
+
+     - The total floating-point operations executed per second on the
+       :ref:`VALU <desc-valu>`.  This is also presented as a percent of the peak
+       theoretical FLOPs achievable on the specific accelerator. Note: this does
+       not include any floating-point operations from :ref:`MFMA <desc-mfma>`
+       instructions.
+
+     - GFLOPs
+
+   * - :ref:`VALU <desc-valu>` IOPs
+
+     - The total integer operations executed per second on the
+       :ref:`VALU <desc-valu>`. This is also presented as a percent of the peak
+       theoretical IOPs achievable on the specific accelerator. Note: this does
+       not include any integer operations from :ref:`MFMA <desc-mfma>`
+       instructions.
+
+     - GIOPs
+
+   * - :ref:`MFMA <desc-mfma>` FLOPs (BF16)
+
+     - The total number of 16-bit brain floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 16-bit
+       brain floating point operations from :ref:`VALU <desc-valu>`
+       instructions. This is also presented as a percent of the peak theoretical
+       BF16 MFMA operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - :ref:`MFMA <desc-mfma>` FLOPs (F16)
+
+     - The total number of 16-bit floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 16-bit
+       floating point operations from :ref:`VALU <desc-valu>` instructions. This
+       is also presented as a percent of the peak theoretical F16 MFMA
+       operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - :ref:`MFMA <desc-mfma>` FLOPs (F32)
+
+     - The total number of 32-bit floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 32-bit
+       floating point operations from :ref:`VALU <desc-valu>` instructions. This
+       is also presented as a percent of the peak theoretical F32 MFMA
+       operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - :ref:`MFMA <desc-mfma>` FLOPs (F64)
+
+     - The total number of 64-bit floating point :ref:`MFMA <desc-mfma>`
+       operations executed per second. Note: this does not include any 64-bit
+       floating point operations from :ref:`VALU <desc-valu>` instructions. This
+       is also presented as a percent of the peak theoretical F64 MFMA
+       operations achievable on the specific accelerator.
+
+     - GFLOPs
+
+   * - :ref:`MFMA <desc-mfma>` IOPs (INT8)
+
+     - The total number of 8-bit integer :ref:`MFMA <desc-mfma>` operations
+       executed per second. Note: this does not include any 8-bit integer
+       operations from :ref:`VALU <desc-valu>` instructions. This is also
+       presented as a percent of the peak theoretical INT8 MFMA operations
+       achievable on the specific accelerator.
+
+     - GIOPs
+
+   * - :ref:`SALU <desc-salu>` utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`SALU <desc-salu>` was busy executing instructions. Computed as the
+       ratio of the total number of cycles spent by the
+       :ref:`scheduler <desc-scheduler>` issuing :ref:`SALU <desc-salu>` or
+       :ref:`SMEM <desc-salu>` instructions over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - :ref:`VALU <desc-valu>` utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`VALU <desc-valu>` was busy executing instructions. Does not include
+       :ref:`VMEM <desc-vmem>` operations.  Computed as the ratio of the total
+       number of cycles spent by the :ref:`scheduler <desc-scheduler>` issuing
+       :ref:`VALU <desc-valu>` instructions over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - :ref:`MFMA <desc-mfma>` utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`MFMA <desc-mfma>` unit was busy executing instructions. Computed as
+       the ratio of the total number of cycles the MFMA was busy over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - :ref:`VMEM <desc-valu>` utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`VMEM <desc-valu>` unit was busy executing instructions, including
+       both global/generic and spill/scratch operations (see the
+       :ref:`VMEM instruction count metrics <ta-instruction-counts>`) for more
+       detail). Does not include :ref:`VALU <desc-valu>` operations. Computed as
+       the ratio of the total number of cycles spent by the
+       :ref:`scheduler <desc-scheduler>` issuing VMEM instructions over the
+       :ref:`total CU cycles <total-cu-cycles>`.
+
+     - Percent
+
+   * - :ref:`Branch <desc-branch>` utilization
+
+     - Indicates what percent of the kernel's duration the
+       :ref:`branch <desc-branch>` unit was busy executing instructions.
+       Computed as the ratio of the total number of cycles spent by the
+       :ref:`scheduler <desc-scheduler>` issuing :ref:`branch <desc-branch>`
+       instructions over the :ref:`total CU cycles <total-cu-cycles>`
+
+     - Percent
+
+   * - :ref:`VALU <desc-valu>` active threads
+
+     - Indicates the average level of :ref:`divergence <desc-divergence>` within
+       a wavefront over the lifetime of the kernel. The number of work-items
+       that were active in a wavefront during execution of each
+       :ref:`VALU <desc-valu>` instruction, time-averaged over all VALU
+       instructions run on all wavefronts in the kernel.
+
+     - Work-items
+
+   * - IPC
+
+     - The ratio of the total number of instructions executed on the
+       :doc:`CU <compute-unit>` over the
+       :ref:`total active CU cycles <total-active-cu-cycles>`. This is also
+       presented as a percent of the peak theoretical bandwidth achievable on
+       the specific accelerator.
+
+     - Instructions per-cycle
+
+   * - Wavefront occupancy
+
+     - The time-averaged number of wavefronts resident on the accelerator over
+       the lifetime of the kernel. Note: this metric may be inaccurate for
+       short-running kernels (less than 1ms). This is also presented as a
+       percent of the peak theoretical occupancy achievable on the specific
+       accelerator.
+
+     - Wavefronts
+
+   * - :doc:`LDS <local-data-share>` theoretical bandwidth
+
+     - Indicates the maximum amount of bytes that could have been loaded from,
+       stored to, or atomically updated in the LDS per unit time (see
+       :ref:`LDS Bandwidth <lds-bandwidth>` example for more detail). This is
+       also presented as a percent of the peak theoretical F64 MFMA operations
+       achievable on the specific accelerator.
+
+     - GB/s
+
+   * - :doc:`LDS <local-data-share>` bank conflicts/access
+
+     - The ratio of the number of cycles spent in the
+       :doc:`LDS scheduler <local-data-share>` due to bank conflicts (as
+       determined by the conflict resolution hardware) to the base number of
+       cycles that would be spent in the LDS scheduler in a completely
+       uncontended case. This is also presented in normalized form (i.e., the
+       Bank Conflict Rate).
+
+     - Conflicts/Access
+
+   * - :doc:`vL1D <vector-l1-cache>` cache hit rate
+
+     - The ratio of the number of vL1D cache line requests that hit in vL1D
+       cache over the total number of cache line requests to the
+       :ref:`vL1D cache RAM <desc-tc>`.
+
+     - Percent
+
+   * - :doc:`vL1D <vector-l1-cache>` cache bandwidth
+
+     - The number of bytes looked up in the vL1D cache as a result of
+       :ref:`VMEM <desc-vmem>` instructions per unit time. The number of bytes
+       is calculated as the number of cache lines requested multiplied by the
+       cache line size. This value does not consider partial requests, so e.g.,
+       if only a single value is requested in a cache line, the data movement
+       will still be counted as a full cache line. This is also presented as a
+       percent of the peak theoretical bandwidth achievable on the specific
+       accelerator.
+
+     - GB/s
+
+   * - :doc:`L2 <l2-cache>` cache hit rate
+
+     - The ratio of the number of L2 cache line requests that hit in the L2
+       cache over the total number of incoming cache line requests to the L2
+       cache.
+
+     - Percent
+
+   * - :doc:`L2 <l2-cache>` cache bandwidth
+
+     - The number of bytes looked up in the L2 cache per unit time.  The number
+       of bytes is calculated as the number of cache lines requested multiplied
+       by the cache line size. This value does not consider partial requests, so
+       e.g., if only a single value is requested in a cache line, the data
+       movement will still be counted as a full cache line. This is also
+       presented as a percent of the peak theoretical bandwidth achievable on
+       the specific accelerator.
+
+     - GB/s
+
+   * - :doc:`L2 <l2-cache>`-fabric read BW
+
+     - The number of bytes read by the L2 over the
+       :ref:`Infinity Fabric™ interface <l2-fabric>` per unit time. This is also
+       presented as a percent of the peak theoretical bandwidth achievable on
+       the specific accelerator.
+
+     - GB/s
+
+   * - :doc:`L2 <l2-cache>`-fabric write and atomic BW
+
+     - The number of bytes sent by the L2 over the
+       :ref:`Infinity Fabric interface <l2-fabric>` by write and atomic
+       operations per unit time. This is also presented as a percent of the peak
+       theoretical bandwidth achievable on the specific accelerator.
+
+     - GB/s
+
+   * - :doc:`L2 <l2-cache>`-fabric read latency
+
+     - The time-averaged number of cycles read requests spent in Infinity Fabric
+       before data was returned to the L2.
+
+     - Cycles
+
+   * - :doc:`L2 <l2-cache>`-fabric write latency
+
+     - The time-averaged number of cycles write requests spent in Infinity
+       Fabric before a completion acknowledgement was returned to the L2.
+
+     - Cycles
+
+   * - :ref:`sL1D <desc-sl1d>` cache hit rate
+
+     - The percent of sL1D requests that hit on a previously loaded line the
+       cache. Calculated as the ratio of the number of sL1D requests that hit
+       over the number of all sL1D requests.
+
+     - Percent
+
+   * - :ref:`sL1D <desc-sl1d>` bandwidth
+
+     - The number of bytes looked up in the sL1D cache per unit time. This is
+       also presented as a percent of the peak theoretical bandwidth achievable
+       on the specific accelerator.
+
+     - GB/s
+
+   * - :ref:`L1I <desc-l1i>` bandwidth
+
+     - The number of bytes looked up in the L1I cache per unit time. This is
+       also presented as a percent of the peak theoretical bandwidth achievable
+       on the specific accelerator.
+
+     - GB/s
+
+   * - :ref:`L1I <desc-l1i>` cache hit rate
+
+     - The percent of L1I requests that hit on a previously loaded line the
+       cache. Calculated as the ratio of the number of L1I requests that hit
+       over the number of all L1I requests.
+
+     - Percent
+
+   * - :ref:`L1I <desc-l1i>` fetch latency
+
+     - The average number of cycles spent to fetch instructions to a
+       :doc:`CU <compute-unit>`.
+
+     - Cycles
+
diff --git a/projects/rocprofiler-compute/docs/conceptual/vector-l1-cache.rst b/projects/rocprofiler-compute/docs/conceptual/vector-l1-cache.rst
new file mode 100644
index 0000000000..086c195be5
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conceptual/vector-l1-cache.rst
@@ -0,0 +1,767 @@
+.. meta::
+   :description: Omniperf performance model: Vector L1 cache (vL1D)
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD, vector, l1, cache, vl1d
+
+**********************
+Vector L1 cache (vL1D)
+**********************
+
+The vector L1 data (vL1D) cache is local to each
+:doc:`compute unit <compute-unit>` on the accelerator, and handles vector memory
+operations issued by a wavefront. The vL1D cache consists of several components:
+
+* An address processing unit, also known as the
+  :ref:`texture addresser <desc-ta>` which receives commands (instructions) and
+  write/atomic data from the :doc:`compute unit <compute-unit>`, and coalesces
+  them into fewer requests for the cache to process.
+
+* An address translation unit, also known as the
+  :ref:`L1 Unified Translation Cache (UTCL1) <desc-utcl1>`, that translates
+  requests from virtual to physical addresses for lookup in the cache. The
+  translation unit has an L1 translation lookaside buffer (L1TLB) to reduce the
+  cost of repeated translations.
+
+* A Tag RAM that looks up whether a requested cache line is already
+  present in the :ref:`cache <desc-tc>`.
+
+* The result of the Tag RAM lookup is placed in the L1 cache controller
+  for routing to the correct location; for instance, the
+  :ref:`L2 Memory Interface <vl1d-l2-transaction-detail>` for misses or the
+  :ref:`cache RAM <desc-tc>` for hits.
+
+* The cache RAM, also known as the :ref:`texture cache (TC) <desc-tc>`, stores
+  requested data for potential reuse. Data returned from the
+  :doc:`L2 cache <l2-cache>` is placed into the cache RAM before going down the
+  :ref:`data-return path <desc-td>`.
+
+* A backend data processing unit, also known as the
+  :ref:`texture data (TD) <desc-td>` that routes data back to the requesting
+  :doc:`compute unit <compute-unit>`.
+
+Together, this complex is known as the vL1D, or Texture Cache per Pipe
+(TCP). A simplified diagram of the vL1D is presented below:
+
+.. figure:: ../data/performance-model/l1perf_model.png
+   :align: center
+   :alt: Performance model of the vL1D Cache on AMD Instinct
+   :width: 800
+
+   Performance model of the vL1D Cache on AMD Instinct MI-series accelerators.
+
+.. _vl1d-sol:
+
+vL1D Speed-of-Light
+===================
+
+.. warning::
+
+   The theoretical maximum throughput for some metrics in this section are
+   currently computed with the maximum achievable clock frequency, as reported
+   by ``rocminfo``, for an accelerator. This may not be realistic for all
+   workloads.
+
+The vL1D’s speed-of-light chart shows several key metrics for the vL1D
+as a comparison with the peak achievable values of those metrics.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Hit Rate
+
+     - The ratio of the number of vL1D cache line requests that hit [#vl1d-hit]_
+       in vL1D cache over the total number of cache line requests to the
+       :ref:`vL1D Cache RAM <desc-tc>`.
+
+     - Percent
+
+   * - Bandwidth
+
+     - The number of bytes looked up in the vL1D cache as a result of
+       :ref:`VMEM <desc-vmem>` instructions, as a percent of the peak
+       theoretical bandwidth achievable on the specific accelerator. The number
+       of bytes is calculated as the number of cache lines requested multiplied
+       by the cache line size. This value does not consider partial requests, so
+       for instance, if only a single value is requested in a cache line, the
+       data movement will still be counted as a full cache line.
+
+     - Percent
+
+   * - Utilization
+
+     - Indicates how busy the :ref:`vL1D Cache RAM <desc-tc>` was during the
+       kernel execution. The number of cycles where the vL1D Cache RAM is
+       actively processing any request divided by the number of cycles where the
+       vL1D is active [#vl1d-activity]_.
+
+     - Percent
+
+   * - Coalescing
+
+     - Indicates how well memory instructions were coalesced by the
+       :ref:`address processing unit <desc-ta>`, ranging from uncoalesced (25%)
+       to fully coalesced (100%). Calculated as the average number of
+       :ref:`thread-requests <thread-requests>` generated per instruction
+       divided by the ideal number of thread-requests per instruction.
+
+     - Percent
+
+.. _desc-ta:
+
+Address processing unit or Texture Addresser (TA)
+=================================================
+
+The :doc:`vL1D <vector-l1-cache>`’s address processing unit receives vector
+memory instructions (commands) along with write/atomic data from a
+:doc:`compute unit <compute-unit>` and is responsible for coalescing these into
+requests for lookup in the :ref:`vL1D RAM <desc-tc>`. The address processor
+passes information about the commands (coalescing state, destination SIMD,
+etc.) to the :ref:`data processing unit <desc-td>` for use after the requested
+data has been retrieved.
+
+Omniperf reports several metrics to indicate performance bottlenecks in
+the address processing unit, which are broken down into a few
+categories:
+
+-  :ref:`ta-busy-stall`
+
+-  :ref:`ta-instruction-counts`
+
+-  :ref:`ta-spill-stack`
+
+.. _ta-busy-stall:
+
+Busy / stall metrics
+--------------------
+
+When executing vector memory instructions, the compute unit must send an
+address (and in the case of writes/atomics, data) to the address
+processing unit. When the front-end cannot accept any more addresses, it
+must backpressure the wave-issue logic for the VMEM pipe and prevent the
+issue of further vector memory instructions.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Busy
+
+     - Percent of the :ref:`total CU cycles <total-cu-cycles>` the address
+       processor was busy
+
+     - Percent
+
+   * - Address Stall
+
+     - Percent of the :ref:`total CU cycles <total-cu-cycles>` the address
+       processor was stalled from sending address requests further into the vL1D
+       pipeline
+
+     - Percent
+
+   * - Data Stall
+
+     - Percent of the :ref:`total CU cycles <total-cu-cycles>` the address
+       processor was stalled from sending write/atomic data further into the
+       vL1D pipeline
+
+     - Percent
+
+   * - Data-Processor → Address Stall
+
+     - Percent of :ref:`total CU cycles <total-cu-cycles>` the address processor
+       was stalled waiting to send command data to the
+       :ref:`data processor <desc-td>`
+
+     - Percent
+
+.. _ta-instruction-counts:
+
+Instruction counts
+------------------
+
+The address processor also counts instruction types to give the user
+information on what sorts of memory instructions were executed by the
+kernel. These are broken down into a few major categories:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Memory type
+
+     - Usage
+
+     - Description
+
+   * - Global
+
+     - Global memory
+
+     - Global memory can be seen by all threads from a process. This includes
+       the local accelerator's DRAM, remote accelerator's DRAM, and the host's
+       DRAM.
+
+   * - Generic
+
+     - Dynamic address spaces
+
+     - Generic memory, or "flat" memory, is used when the compiler cannot
+       statically prove that a pointer is to memory in one or the other address
+       spaces. The pointer could dynamically point into global, local, constant,
+       or private memory.
+
+   * - Private Memory
+
+     - Register spills / Stack memory
+
+     - Private memory, or "scratch" memory, is only visible to a particular
+       :ref:`work-item <desc-work-item>` in a particular
+       :ref:`workgroup <desc-workgroup>`. On AMD Instinct™ MI-series
+       accelerators, private memory is used to implement both register spills
+       and stack memory accesses.
+
+The address processor counts these instruction types as follows:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Type
+
+     - Description
+
+     - Unit
+
+   * - Global/Generic
+
+     - The total number of global & generic memory instructions executed on all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Global/Generic Read
+
+     - The total number of global & generic memory read instructions executed on
+       all :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Global/Generic Write
+
+     - The total number of global & generic memory write instructions executed
+       on all :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Global/Generic Atomic
+
+     - The total number of global & generic memory atomic (with and without
+       return) instructions executed on all :doc:`compute units <compute-unit>`
+       on the accelerator, per :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Spill/Stack
+
+     - The total number of spill/stack memory instructions executed on all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Spill/Stack Read
+
+     - The total number of spill/stack memory read instructions executed on all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Spill/Stack Write
+
+     - The total number of spill/stack memory write instructions executed on all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instruction per :ref:`normalization unit <normalization-units>`
+
+   * - Spill/Stack Atomic
+
+     - The total number of spill/stack memory atomic (with and without return)
+       instructions executed on all :doc:`compute units <compute-unit>` on the
+       accelerator, per :ref:`normalization unit <normalization-units>`.
+       Typically unused as these memory operations are typically used to
+       implement thread-local storage.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+.. note::
+
+   The above is a simplified model specifically for the HIP programming language
+   that does not consider inline assembly usage, constant memory usage or
+   texture memory.
+
+   These categories correspond to:
+
+   * Global/Generic: global and flat memory operations, that are used for global
+     and generic memory access.
+
+   * Spill/Stack: buffer instructions which are used on the MI50, MI100, and
+     :ref:`MI2XX <mixxx-note>` accelerators for register spills / stack memory.
+
+   These concepts are described in more detail in the :ref:`memory-spaces`,
+   while generic memory access is explored in the
+   :ref:`generic memory benchmark <flat-memory-ex>` section.
+
+.. _ta-spill-stack:
+
+Spill / stack metrics
+---------------------
+
+Finally, the address processing unit contains a separate coalescing
+stage for spill/stack memory, and thus reports:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Spill/Stack Total Cycles
+
+     - The number of cycles the address processing unit spent working on
+       spill/stack instructions, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Spill/Stack Coalesced Read Cycles
+
+     - The number of cycles the address processing unit spent working on
+       coalesced spill/stack read instructions, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+   * - Spill/Stack Coalesced Write Cycles
+
+     - The number of cycles the address processing unit spent working on
+       coalesced spill/stack write instructions, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cycles per :ref:`normalization unit <normalization-units>`
+
+.. _desc-utcl1:
+
+L1 Unified Translation Cache (UTCL1)
+====================================
+
+After a vector memory instruction has been processed/coalesced by the
+address processing unit of the vL1D, it must be translated from a
+virtual to physical address. This process is handled by the L1 Unified
+Translation Cache (UTCL1). This cache contains a L1 Translation
+Lookaside Buffer (TLB) which stores recently translated addresses to
+reduce the cost of subsequent re-translations.
+
+Omniperf reports the following L1 TLB metrics:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Requests
+
+     - The number of translation requests made to the UTCL1 per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Hits
+
+     - The number of translation requests that hit in the UTCL1, and could be
+       reused, per :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Hit Ratio
+
+     - The ratio of the number of translation requests that hit in the UTCL1
+       divided by the total number of translation requests made to the UTCL1.
+
+     - Percent
+
+   * - Translation Misses
+
+     - The total number of translation requests that missed in the UTCL1 due to
+       translation not being present in the cache, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Permission Misses
+
+     - The total number of translation requests that missed in the UTCL1 due to
+       a permission error, per :ref:`normalization unit <normalization-units>`.
+       This is unused and expected to be zero in most configurations for modern
+       CDNA™ accelerators.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+.. note::
+
+   On current CDNA accelerators, such as the :ref:`MI2XX <mixxx-note>`, the
+   UTCL1 does *not* count hit-on-miss requests.
+
+.. _desc-tc:
+
+Vector L1 Cache RAM or Texture Cache (TC)
+=========================================
+
+After coalescing in the :ref:`address processing unit <desc-ta>` of the v1LD,
+and address translation in the :ref:`L1 TLB <desc-utcl1>` the request proceeds
+to the Cache RAM stage of the pipeline. Incoming requests are looked up
+in the cache RAMs using parts of the physical address as a tag. Hits
+will be returned through the :ref:`data-return path <desc-td>`, while misses
+will routed out to the :doc:`L2 Cache <l2-cache>` for servicing.
+
+The metrics tracked by the vL1D RAM include:
+
+-  :ref:`Stall metrics <vl1d-cache-stall-metrics>`
+
+-  :ref:`Cache access metrics <vl1d-cache-access-metrics>`
+
+-  :ref:`vL1D-L2 transaction detail metrics <vl1d-l2-transaction-detail>`
+
+.. _vl1d-cache-stall-metrics:
+
+vL1D cache stall metrics
+------------------------
+
+The vL1D also reports where it is stalled in the pipeline, which may
+indicate performance limiters of the cache. A stall in the pipeline may
+result in backpressuring earlier parts of the pipeline, e.g., a stall on
+L2 requests may backpressure the wave-issue logic of the :ref:`VMEM <desc-vmem>`
+pipe and prevent it from issuing more vector memory instructions until
+the vL1D’s outstanding requests are completed.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Stalled on L2 Data
+
+     - The ratio of the number of cycles where the vL1D is stalled waiting for
+       requested data to return from the :doc:`L2 cache <l2-cache>` divided by
+       the number of cycles where the vL1D is active [#vl1d-activity]_.
+
+     - Percent
+
+   * - Stalled on L2 Requests
+
+     - The ratio of the number of cycles where the vL1D is stalled waiting to
+       issue a request for data to the :doc:`L2 cache <l2-cache>` divided by the
+       number of cycles where the vL1D is active [#vl1d-activity]_.
+
+     - Percent
+
+   * - Tag RAM Stall (Read/Write/Atomic)
+
+     - The ratio of the number of cycles where the vL1D is stalled due to
+       Read/Write/Atomic requests with conflicting tags being looked up
+       concurrently, divided by the number of cycles where the
+       vL1D is active [#vl1d-activity]_.
+
+     - Percent
+
+.. _vl1d-cache-access-metrics:
+
+vL1D cache access metrics
+-------------------------
+
+The vL1D cache access metrics broadly indicate the type of requests
+incoming from the :ref:`cache front-end <desc-ta>`, the number of requests that
+were serviced by the vL1D, and the number & type of outgoing requests to
+the :doc:`L2 cache <l2-cache>`. In addition, this section includes the
+approximate latencies of accesses to the cache itself, along with
+latencies of read/write memory operations to the :doc:`L2 cache <l2-cache>`.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Total Requests
+
+     - The total number of incoming requests from the
+       :ref:`address processing unit <desc-ta>` after coalescing.
+
+     - Requests
+
+   * - Total read/write/atomic requests
+
+     - The total number of incoming read/write/atomic requests from the
+       :ref:`address processing unit <desc-ta>` after coalescing per
+       :ref:`normalization unit <normalization-units>`
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - Cache Bandwidth
+
+     - The number of bytes looked up in the vL1D cache as a result of
+       :ref:`VMEM <desc-vmem>` instructions per
+       :ref:`normalization unit <normalization-units>`.  The number of bytes is
+       calculated as the number of cache lines requested multiplied by the cache
+       line size.  This value does not consider partial requests, so for
+       instance, if only a single value is requested in a cache line, the data
+       movement will still be counted as a full cache line.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`
+
+   * - Cache Hit Rate [#vl1d-hit]_
+
+     - The ratio of the number of vL1D cache line requests that hit in vL1D
+       cache over the total number of cache line requests to the
+       :ref:`vL1D Cache RAM <desc-tc>`.
+
+     - Percent
+
+   * - Cache Accesses
+
+     - The total number of cache line lookups in the vL1D.
+
+     - Cache lines
+
+   * - Cache Hits [#vl1d-hit]_
+
+     - The number of cache accesses minus the number of outgoing requests to the
+       :doc:`L2 cache <l2-cache>`, that is, the number of cache line requests
+       serviced by the :ref:`vL1D Cache RAM <desc-tc>` per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Cache lines per :ref:`normalization unit <normalization-units>`
+
+   * - Invalidations
+
+     - The number of times the vL1D was issued a write-back invalidate command
+       during the kernel's execution per
+       :ref:`normalization unit <normalization-units>`.  This may be triggered
+       by, for instance, the ``buffer_wbinvl1`` instruction.
+
+     - Invalidations per :ref:`normalization unit <normalization-units>`
+
+   * - L1-L2 Bandwidth
+
+     - The number of bytes transferred across the vL1D-L2 interface as a result
+       of :ref:`VMEM <desc-vmem>` instructions, per
+       :ref:`normalization unit <normalization-units>`. The number of bytes is
+       calculated as the number of cache lines requested multiplied by the cache
+       line size. This value does not consider partial requests, so for
+       instance, if only a single value is requested in a cache line, the data
+       movement will still be counted as a full cache line.
+
+     - Bytes per :ref:`normalization unit <normalization-units>`
+
+   * - L1-L2 Reads
+
+     - The number of read requests for a vL1D cache line that were not satisfied
+       by the vL1D and must be retrieved from the to the
+       :doc:`L2 Cache <l2-cache>` per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - L1-L2 Writes
+
+     - The number of write requests to a vL1D cache line that were sent through
+       the vL1D to the :doc:`L2 cache <l2-cache>`, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - L1-L2 Atomics
+
+     - The number of atomic requests that are sent through the vL1D to the
+       :doc:`L2 cache <l2-cache>`, per
+       :ref:`normalization unit <normalization-units>`. This includes requests
+       for atomics with, and without return.
+
+     - Requests per :ref:`normalization unit <normalization-units>`
+
+   * - L1 Access Latency
+
+     - Calculated as the average number of cycles that a vL1D cache line request
+       spent in the vL1D cache pipeline.
+
+     - Cycles
+
+   * - L1-L2 Read Access Latency
+
+     - Calculated as the average number of cycles that the vL1D cache took to
+       issue and receive read requests from the :doc:`L2 Cache <l2-cache>`. This
+       number also includes requests for atomics with return values.
+
+     - Cycles
+
+   * - L1-L2 Write Access Latency
+
+     - Calculated as the average number of cycles that the vL1D cache took to
+       issue and receive acknowledgement of a write request to the
+       :doc:`L2 Cache <l2-cache>`. This number also includes requests for
+       atomics without return values.
+
+     - Cycles
+
+.. note::
+
+   All cache accesses in vL1D are for a single cache line's worth of data.
+   The size of a cache line may vary, however on current AMD Instinct MI CDNA
+   accelerators and GCN™ GPUs the L1 cache line size is 64B.
+
+.. rubric :: Footnotes
+
+.. [#vl1d-hit] The vL1D cache on AMD Instinct MI-series CDNA accelerators
+   uses a "hit-on-miss" approach to reporting cache hits. That is, if while
+   satisfying a miss, another request comes in that would hit on the same
+   pending cache line, the subsequent request will be counted as a "hit".
+   Therefore, it is also important to consider the access latency metric in the
+   :ref:`Cache access metrics <vl1d-cache-stall-metrics>` section when
+   evaluating the vL1D hit rate.
+
+.. [#vl1d-activity] Omniperf considers the vL1D to be active when any part of
+   the vL1D (excluding the :ref:`address processor <desc-ta>` and
+   :ref:`data return <desc-td>` units) are active, for example, when performing
+   a translation, waiting for data, accessing the Tag or Cache RAMs, etc.
+
+.. _vl1d-l2-transaction-detail:
+
+vL1D - L2 Transaction Detail
+----------------------------
+
+This section provides a more granular look at the types of requests made
+to the :doc:`L2 cache <l2-cache>`. These are broken down by the operation type
+(read / write / atomic, with, or without return), and the
+:ref:`memory type <memory-type>`.
+
+.. _desc-td:
+
+Vector L1 data-return path or Texture Data (TD)
+===============================================
+
+The data-return path of the vL1D cache, also known as the Texture Data
+(TD) unit, is responsible for routing data returned from the
+:ref:`vL1D cache RAM <desc-tc>` back to a wavefront on a SIMD. As described in
+the :ref:`vL1D cache front-end <desc-ta>` section, the data-return path is passed
+information about the space requirements and routing for data requests
+from the :ref:`VALU <desc-valu>`. When data is returned from the
+:ref:`vL1D cache RAM <desc-tc>`, it is matched to this previously stored request
+data, and returned to the appropriate SIMD.
+
+Omniperf reports the following vL1D data-return path metrics:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Metric
+
+     - Description
+
+     - Unit
+
+   * - Data-return Busy
+
+     - Percent of the :ref:`total CU cycles <total-cu-cycles>` the data-return
+       unit was busy processing or waiting on data to return to the
+       :doc:`CU <compute-unit>`.
+
+     - Percent
+
+   * - Cache RAM → Data-return Stall
+
+     - Percent of the :ref:`total CU cycles <total-cu-cycles>` the data-return
+       unit was stalled on data to be returned from the
+       :ref:`vL1D Cache RAM <desc-tc>`.
+
+     - Percent
+
+   * - Workgroup manager → Data-return Stall
+
+     - Percent of the :ref:`total CU cycles <total-cu-cycles>` the data-return
+       unit was stalled by the :ref:`workgroup manager <desc-spi>` due to
+       initialization of registers as a part of launching new workgroups.
+
+     - Percent
+
+   * - Coalescable Instructions
+
+     - The number of instructions submitted to the
+       :ref:`data-return unit <desc-td>` by the
+       :ref:`address processor <desc-ta>` that were found to be coalescable, per
+       :ref:`normalization unit <normalization-units>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Read Instructions
+
+     - The number of read instructions submitted to the
+       :ref:`data-return unit <desc-td>` by the
+       :ref:`address processor <desc-ta>` summed over all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`. This is expected to be
+       the sum of global/generic and spill/stack reads in the
+       :ref:`address processor <desc-ta>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Write Instructions
+
+     - The number of store instructions submitted to the
+       :ref:`data-return unit <desc-td>` by the
+       :ref:`address processor <desc-ta>` summed over all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`. This is expected to be
+       the sum of global/generic and spill/stack stores counted by the
+       :ref:`vL1D cache-front-end <ta-instruction-counts>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
+   * - Atomic Instructions
+
+     - The number of atomic instructions submitted to the
+       :ref:`data-return unit <desc-td>` by the
+       :ref:`address processor <desc-ta>` summed over all
+       :doc:`compute units <compute-unit>` on the accelerator, per
+       :ref:`normalization unit <normalization-units>`. This is expected to be
+       the sum of global/generic and spill/stack atomics in the
+       :ref:`address processor <desc-ta>`.
+
+     - Instructions per :ref:`normalization unit <normalization-units>`
+
diff --git a/projects/rocprofiler-compute/docs/conf.py b/projects/rocprofiler-compute/docs/conf.py
new file mode 100644
index 0000000000..b38ce2e5cf
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/conf.py
@@ -0,0 +1,93 @@
+# MIT License
+
+# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+import re
+
+with open("../VERSION", encoding="utf-8") as f:
+    match = re.search(r"([0-9.]+)[^0-9.]+", f.read())
+    if not match:
+        raise ValueError("VERSION not found!")
+    version_number = match[1]
+
+# project info
+project = "Omniperf"
+author = "Advanced Micro Devices, Inc."
+copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved."
+version = version_number
+release = version_number
+
+extensions = ["rocm_docs", "sphinx.ext.extlinks", "sphinxcontrib.datatemplates"]
+html_theme = "rocm_docs_theme"
+html_theme_options = {"flavor": "rocm"}
+html_title = f"{project} {version_number} documentation"
+exclude_patterns = ["archive", "*/includes"]
+
+html_static_path = ["sphinx/static/css"]
+html_css_files = ["o_custom.css"]
+
+external_toc_path = "./sphinx/_toc.yml"
+external_projects_current_project = "omniperf"
+
+# frequently used external resources
+extlinks = {
+    "dev-sample": ("https://github.com/ROCm/omniperf/blob/dev/sample/%s", "%s"),
+    "prod-page": (
+        "https://www.amd.com/en/products/accelerators/instinct/%s.html",
+        "%s",
+    ),
+    "llvm-docs": ("https://llvm.org/docs/AMDGPUUsage.html#%s", "%s"),
+    "amd-lab-note": ("https://gpuopen.com/learn/amd-lab-notes/%s", "%s"),
+    "cdna2-white-paper": (
+        "https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf#page=%s",
+        "CDNA2 white paper (page %s)",
+    ),
+    "gcn-crash-course": (
+        "https://www.slideshare.net/DevCentralAMD/gs4106-the-amd-gcn-architecture-a-crash-course-by-layla-mah#%s",
+        "The AMD GCN Architecture - A Crash Course (slide %s)",
+    ),
+    "hip-training-pdf": (
+        "https://www.olcf.ornl.gov/wp-content/uploads/2019/09/AMD_GPU_HIP_training_20190906.pdf#page=%s",
+        "Introduction to AMD GPU Programming with HIP (slide %s)",
+    ),
+    "mantor-gcn-pdf": (
+        "https://old.hotchips.org/wp-content/uploads/hc_archives/hc24/HC24-3-ManyCore/HC24.28.315-AMD.GCN.mantor_v1.pdf#page=%s",
+        "AMD Radeon HD7970 with GCN Architecture (slide %s)",
+    ),
+    "mantor-vega10-pdf": (
+        "https://old.hotchips.org/wp-content/uploads/hc_archives/hc29/HC29.21-Monday-Pub/HC29.21.10-GPU-Gaming-Pub/HC29.21.120-Radeon-Vega10-Mantor-AMD-f1.pdf#page=%s",
+        "AMD Radeon Next Generation GPU Architecture - Vega10 (slide %s)",
+    ),
+    "mi200-isa-pdf": (
+        "https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf#page=%s",
+        "AMD Instinct MI200 ISA Reference Guide (page %s)",
+    ),
+    "hsa-runtime-pdf": (
+        "http://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf#page=%s",
+        "HSA Runtime Programmer's Reference Manual (page %s)",
+    ),
+}
diff --git a/projects/rocprofiler-compute/docs/data/analyze/global_variables.png b/projects/rocprofiler-compute/docs/data/analyze/global_variables.png
new file mode 100644
index 0000000000..87f49b5e14
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/global_variables.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/Current_and_baseline_dispatch_ids.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/Current_and_baseline_dispatch_ids.png
new file mode 100644
index 0000000000..811bf99692
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/Current_and_baseline_dispatch_ids.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/Kernel_time_histogram.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/Kernel_time_histogram.png
new file mode 100644
index 0000000000..8ec0fd83ba
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/Kernel_time_histogram.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/Top_bottleneck_dispatches.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/Top_bottleneck_dispatches.png
new file mode 100644
index 0000000000..31d13a0a2f
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/Top_bottleneck_dispatches.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cpc_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpc_panel.png
new file mode 100644
index 0000000000..7b7f758588
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpc_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cpf_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpf_panel.png
new file mode 100644
index 0000000000..a43b878536
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cpf_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-arith-ops_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-arith-ops_panel.png
new file mode 100644
index 0000000000..073b64d707
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-arith-ops_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-inst-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-inst-mix_panel.png
new file mode 100644
index 0000000000..1b9a6d2b25
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-inst-mix_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png
new file mode 100644
index 0000000000..d74dfd271a
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-pipeline-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-pipeline-stats_panel.png
new file mode 100644
index 0000000000..6f572f9148
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-pipeline-stats_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-sol_panel.png
new file mode 100644
index 0000000000..8e8f46174f
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-value-arith-instr-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-value-arith-instr-mix_panel.png
new file mode 100644
index 0000000000..de3750d2d0
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-value-arith-instr-mix_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-vmem-instr-mix_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-vmem-instr-mix_panel.png
new file mode 100644
index 0000000000..1d6ce1bc46
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/cu-vmem-instr-mix_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-accesses_panel.png
new file mode 100644
index 0000000000..926a7805e7
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-accesses_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-sol_panel.png
new file mode 100644
index 0000000000..64be7178c6
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/instr-cache-sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-accesses_panel.png
new file mode 100644
index 0000000000..101cf77530
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-accesses_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-interface-stalls_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-interface-stalls_panel.png
new file mode 100644
index 0000000000..b1bd415ca3
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-interface-stalls_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-transactions_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-transactions_panel.png
new file mode 100644
index 0000000000..7df5a78095
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-fabric-transactions_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-per-channel-agg-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-per-channel-agg-stats_panel.png
new file mode 100644
index 0000000000..704d45c69f
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-per-channel-agg-stats_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-sol_panel.png
new file mode 100644
index 0000000000..646e608cbc
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/l2-sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-sol_panel.png
new file mode 100644
index 0000000000..c261513aa9
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-stats_panel.png
new file mode 100644
index 0000000000..0d9d419eb7
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/lds-stats_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/memory-chart_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/memory-chart_panel.png
new file mode 100644
index 0000000000..1091a50329
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/memory-chart_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/roofline_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/roofline_panel.png
new file mode 100644
index 0000000000..47ee9bddb1
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/roofline_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-cache-accesses_panel.png
new file mode 100644
index 0000000000..3605cce8a2
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-cache-accesses_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-l12-interface_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-l12-interface_panel.png
new file mode 100644
index 0000000000..5c3480ac9f
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-l12-interface_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-sol_panel.png
new file mode 100644
index 0000000000..92fa5a1a4a
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sl1d-sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/sol_panel.png
new file mode 100644
index 0000000000..f456500e02
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-resource-allocation_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-resource-allocation_panel.png
new file mode 100644
index 0000000000..bee869ad10
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-resource-allocation_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-stats_panel.png
new file mode 100644
index 0000000000..19c7ad3645
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/spi-stats_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/system-info_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/system-info_panel.png
new file mode 100644
index 0000000000..5a5fa01187
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/system-info_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/ta_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/ta_panel.png
new file mode 100644
index 0000000000..2f08f9a6b1
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/ta_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/td_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/td_panel.png
new file mode 100644
index 0000000000..819407515b
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/td_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/top-stat_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/top-stat_panel.png
new file mode 100644
index 0000000000..5e3dddca2f
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/top-stat_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-addr-translation_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-addr-translation_panel.png
new file mode 100644
index 0000000000..0fb4aaf076
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-addr-translation_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-accesses_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-accesses_panel.png
new file mode 100644
index 0000000000..5259b2214f
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-accesses_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-stalls_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-stalls_panel.png
new file mode 100644
index 0000000000..61e09c915c
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-cache-stalls_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-l2-transactions_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-l2-transactions_panel.png
new file mode 100644
index 0000000000..51875e516c
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-l2-transactions_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-sol_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-sol_panel.png
new file mode 100644
index 0000000000..5c2485d0d7
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/vl1d-sol_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-launch-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-launch-stats_panel.png
new file mode 100644
index 0000000000..38e4517f33
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-launch-stats_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-runtime-stats_panel.png b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-runtime-stats_panel.png
new file mode 100644
index 0000000000..517d461d31
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/grafana/wavefront-runtime-stats_panel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/analyze/standalone_gui.png b/projects/rocprofiler-compute/docs/data/analyze/standalone_gui.png
new file mode 100644
index 0000000000..a8abd81694
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/standalone_gui.png differ
diff --git a/projects/rocprofiler-compute/docs/data/faq/tunnel_demo1.png b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo1.png
new file mode 100644
index 0000000000..bda64883c4
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo1.png differ
diff --git a/projects/rocprofiler-compute/docs/data/faq/tunnel_demo2.png b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo2.png
new file mode 100644
index 0000000000..8b2d258521
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo2.png differ
diff --git a/projects/rocprofiler-compute/docs/data/faq/tunnel_demo3.png b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo3.png
new file mode 100644
index 0000000000..76cd7ed9a9
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/faq/tunnel_demo3.png differ
diff --git a/projects/rocprofiler-compute/docs/data/install/datasource_config.jpg b/projects/rocprofiler-compute/docs/data/install/datasource_config.jpg
new file mode 100644
index 0000000000..4210d9036b
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/datasource_config.jpg differ
diff --git a/projects/rocprofiler-compute/docs/data/install/datasource_settings.jpg b/projects/rocprofiler-compute/docs/data/install/datasource_settings.jpg
new file mode 100644
index 0000000000..f472362544
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/datasource_settings.jpg differ
diff --git a/projects/rocprofiler-compute/docs/data/install/grafana_welcome.png b/projects/rocprofiler-compute/docs/data/install/grafana_welcome.png
new file mode 100644
index 0000000000..e564c0a389
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/grafana_welcome.png differ
diff --git a/projects/rocprofiler-compute/docs/data/install/grafana_workload_selection.png b/projects/rocprofiler-compute/docs/data/install/grafana_workload_selection.png
new file mode 100644
index 0000000000..3ecdc35e72
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/grafana_workload_selection.png differ
diff --git a/projects/rocprofiler-compute/docs/data/install/import_dashboard.png b/projects/rocprofiler-compute/docs/data/install/import_dashboard.png
new file mode 100644
index 0000000000..29be7ea584
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/import_dashboard.png differ
diff --git a/projects/rocprofiler-compute/docs/data/install/install-decision-tree.png b/projects/rocprofiler-compute/docs/data/install/install-decision-tree.png
new file mode 100644
index 0000000000..1c62fba87b
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/install-decision-tree.png differ
diff --git a/projects/rocprofiler-compute/docs/data/install/omniperf_server_vs_client_install.png b/projects/rocprofiler-compute/docs/data/install/omniperf_server_vs_client_install.png
new file mode 100644
index 0000000000..8c43dba9e2
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/omniperf_server_vs_client_install.png differ
diff --git a/projects/rocprofiler-compute/docs/data/install/opening_dashboard.png b/projects/rocprofiler-compute/docs/data/install/opening_dashboard.png
new file mode 100644
index 0000000000..5e6c7ea625
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/install/opening_dashboard.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/fabric.png b/projects/rocprofiler-compute/docs/data/performance-model/fabric.png
new file mode 100644
index 0000000000..826b4d9de7
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/fabric.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/fabric.svg b/projects/rocprofiler-compute/docs/data/performance-model/fabric.svg
new file mode 100644
index 0000000000..516854843a
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/performance-model/fabric.svg
@@ -0,0 +1,899 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="122.17907mm"
+   height="59.61977mm"
+   viewBox="0 0 122.17907 59.61977"
+   version="1.1"
+   id="svg5"
+   sodipodi:docname="fabric.svg"
+   inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
+   inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/fabric.png"
+   inkscape:export-xdpi="180"
+   inkscape:export-ydpi="180"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview142"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:document-units="mm"
+     showgrid="false"
+     inkscape:zoom="2.8284271"
+     inkscape:cx="251.55324"
+     inkscape:cy="153.61895"
+     inkscape:window-width="2490"
+     inkscape:window-height="1376"
+     inkscape:window-x="70"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0" />
+  <defs
+     id="defs2">
+    <marker
+       style="overflow:visible"
+       id="marker10337"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path10335" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Send"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.2,0,0,-0.2,-1.2,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1319" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313" />
+    </marker>
+    <rect
+       x="83.387405"
+       y="34.737923"
+       width="275.12451"
+       height="59.135487"
+       id="rect97993" />
+    <rect
+       x="428.84567"
+       y="387.25235"
+       width="209.04202"
+       height="89.898453"
+       id="rect64747" />
+    <rect
+       x="396.33554"
+       y="373.98026"
+       width="250.97929"
+       height="100.31375"
+       id="rect58746" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20534" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lstart"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(0.8,0,0,0.8,10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20531" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328" />
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0" />
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6" />
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-3" />
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-3" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend-7"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20534-3" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend-7-9"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20534-3-2" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend-7-9-7"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20534-3-2-6" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-3-0" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend-7-7"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20534-3-0" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend-7-7-2"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path20534-3-0-1" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2" />
+    <rect
+       x="83.387405"
+       y="34.737923"
+       width="275.12451"
+       height="59.135487"
+       id="rect97993-7" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-3"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-5" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-3-2"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-5-9" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2" />
+    <rect
+       x="83.387405"
+       y="34.737923"
+       width="275.12451"
+       height="59.135487"
+       id="rect97993-7-7" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-36"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-7" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2-5" />
+    <rect
+       x="83.387405"
+       y="34.737923"
+       width="275.12451"
+       height="59.135487"
+       id="rect97993-7-7-6" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-36-0"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-7-9" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2-5-3" />
+    <rect
+       x="83.387405"
+       y="34.737923"
+       width="275.12451"
+       height="59.135487"
+       id="rect97993-7-7-6-6" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-3-2-9"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-5-9-2" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-36-0-3"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-7-9-7" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2-5-3-3" />
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2-5-3-3-6" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-36-0-2"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-7-9-70" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-36-3"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-7-6" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-367"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-53" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-6"
+       refX="0"
+       refY="0"
+       orient="auto">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path1313-2" />
+    </marker>
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2-5-3-6" />
+    <rect
+       x="97.144325"
+       y="226.69614"
+       width="148.65651"
+       height="56.321774"
+       id="rect6328-0-6-8-2-2-5-3-3-1" />
+  </defs>
+  <g
+     id="layer1"
+     transform="translate(-3.1044175,-14.33817)">
+    <g
+       id="g3747"
+       transform="translate(0,0.18150994)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.136377;stroke-opacity:1"
+         id="rect31"
+         width="20.273424"
+         height="7.6810331"
+         x="3.172606"
+         y="40.275291" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.13637766,0,0,0.13637766,-10.07571,10.507238)"
+         id="text6326"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328);fill:#000000;fill-opacity:1;stroke:none"
+         x="45.007812"
+         y="0"><tspan
+           x="126.48828"
+           y="240.85156"
+           id="tspan43284"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43282">Total Fabric </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43288"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43286">Requests</tspan></tspan></text>
+    </g>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 23.417828,44.182431 h 7.744823"
+       id="path20529"
+       sodipodi:nodetypes="cc" />
+    <g
+       id="g3739"
+       transform="translate(10.059995,2.72298)">
+      <rect
+         style="opacity:0.28;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.136377;stroke-opacity:1"
+         id="rect31-4"
+         width="20.273424"
+         height="7.6810331"
+         x="31.404562"
+         y="28.797853" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.13637766,0,0,0.13637766,18.156244,-0.9701983)"
+         id="text6326-8"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0);opacity:0.28;fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="133.16016"
+           y="240.85156"
+           id="tspan43292"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43290">32B Read </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43296"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43294">Requests</tspan></tspan></text>
+    </g>
+    <g
+       id="g204"
+       transform="translate(10.059995)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.136377;stroke-opacity:1"
+         id="rect31-4-1"
+         width="20.273424"
+         height="7.6810331"
+         x="31.404562"
+         y="14.407581" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.13637766,0,0,0.13637766,18.156246,-15.360471)"
+         id="text6326-8-2"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="133.16016"
+           y="240.85156"
+           id="tspan43300"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43298">64B Read </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43304"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43302">Requests</tspan></tspan></text>
+    </g>
+    <g
+       id="g33085"
+       transform="translate(0,0.26317766)">
+      <rect
+         style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.136;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         id="rect31-4-6"
+         width="20.273424"
+         height="7.6810331"
+         x="41.464558"
+         y="48.84304" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.13637766,0,0,0.13637766,28.216241,19.074987)"
+         id="text6326-8-3"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-3);opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.997231;stroke-miterlimit:4;stroke-dasharray:none"
+         x="158.11719"
+         y="0"><tspan
+           x="132.14062"
+           y="240.85156"
+           id="tspan43308"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43306">32B Write </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43312"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43310">Requests</tspan></tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       transform="matrix(0.13637766,0,0,0.13637766,-0.0308419,9.5190479)"
+       id="text58744"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect58746);fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.997231;stroke-miterlimit:4;stroke-dasharray:none" />
+    <text
+       xml:space="preserve"
+       transform="matrix(0.13637766,0,0,0.13637766,-0.0308419,9.5190479)"
+       id="text64745"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect64747);fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.997231;stroke-miterlimit:4;stroke-dasharray:none" />
+    <g
+       id="g54852"
+       transform="matrix(0.51544312,0,0,0.51544312,28.426265,12.492382)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
+         id="rect31-4-1-0"
+         width="39.332031"
+         height="14.901806"
+         x="25.295307"
+         y="104.1685" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.26458333,0,0,0.26458333,-0.40746419,46.416146)"
+         id="text6326-8-2-2"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-3);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="132.14062"
+           y="240.85156"
+           id="tspan43316"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43314">64B Write </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43320"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43318">Requests</tspan></tspan></text>
+    </g>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 51.601269,66.186146 V 61.556441"
+       id="path69447-9-7-5-2-3-93-6" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 31.07042,44.609772 V 26.783617"
+       id="path69449" />
+    <g
+       id="g6799"
+       transform="translate(-0.85908839)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.136377;stroke-opacity:1"
+         id="rect31-4-1-7"
+         width="20.273424"
+         height="7.6810331"
+         x="73.9366"
+         y="14.407581" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.13637766,0,0,0.13637766,60.688286,-15.360471)"
+         id="text6326-8-2-8"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="109.13281"
+           y="240.85156"
+           id="tspan43326"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43322">Uncached </tspan><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43324">Read </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43330"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43328">Requests</tspan></tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       transform="matrix(0.26458333,0,0,0.26458333,23.273945,0.03952421)"
+       id="text97991"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect97993);fill:#000000;fill-opacity:1;stroke:none" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:2.64068px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0660172"
+       x="65.804695"
+       y="17.049156"
+       id="text104993"><tspan
+         id="tspan104991"
+         style="stroke-width:0.0660172"
+         x="65.804695"
+         y="17.049156">x2</tspan></text>
+    <g
+       id="g23519"
+       transform="translate(-8.0320305,-0.27460261)">
+      <g
+         id="g33659">
+        <rect
+           style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.136377;stroke-opacity:1"
+           id="rect31-4-1-7-2"
+           width="20.273424"
+           height="7.6810331"
+           x="81.756866"
+           y="66.459923" />
+        <text
+           xml:space="preserve"
+           transform="matrix(0.13637766,0,0,0.13637766,68.508552,36.707843)"
+           id="text6326-8-2-8-6"
+           style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8-2);fill:#000000;fill-opacity:1;stroke:none"
+           x="158.11719"
+           y="0"><tspan
+             x="108.11328"
+             y="240.85156"
+             id="tspan43336"><tspan
+               style="text-align:center;text-anchor:middle"
+               id="tspan43332">Uncached </tspan><tspan
+               style="text-align:center;text-anchor:middle"
+               id="tspan43334">Write </tspan></tspan><tspan
+             x="134.80859"
+             y="260.85156"
+             id="tspan43340"><tspan
+               style="text-align:center;text-anchor:middle"
+               id="tspan43338">Requests</tspan></tspan></text>
+      </g>
+    </g>
+    <text
+       xml:space="preserve"
+       transform="matrix(0.26458333,0,0,0.26458333,23.273945,52.107839)"
+       id="text97991-3"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect97993-7);fill:#000000;fill-opacity:1;stroke:none" />
+    <g
+       id="g10025"
+       transform="matrix(0.51544312,0,0,0.51544312,23.359942,44.057639)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
+         id="rect31-4-1-7-2-9"
+         width="39.332031"
+         height="14.901806"
+         x="81.170959"
+         y="9.7946386" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.26458333,0,0,0.26458333,55.363782,-47.957714)"
+         id="text6326-8-2-8-6-3"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8-2-2);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="141.15234"
+           y="240.85156"
+           id="tspan43344"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43342">Atomic 
+</tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43348"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43346">Requests</tspan></tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       transform="matrix(0.26458333,0,0,0.26458333,23.273945,37.671054)"
+       id="text97991-3-6"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect97993-7-7);fill:#000000;fill-opacity:1;stroke:none" />
+    <text
+       xml:space="preserve"
+       transform="matrix(0.26458333,0,0,0.26458333,23.273945,66.483549)"
+       id="text97991-3-6-7"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect97993-7-7-6);fill:#000000;fill-opacity:1;stroke:none" />
+    <g
+       id="g10025-9-6"
+       transform="matrix(0.51544312,0,0,0.51544312,62.851448,9.3577801)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
+         id="rect31-4-1-7-2-9-1-1"
+         width="39.332031"
+         height="14.901806"
+         x="81.170959"
+         y="9.7946386" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.26458333,0,0,0.26458333,55.363782,-47.957714)"
+         id="text6326-8-2-8-6-3-2-8"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8-2-2-5-3);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="130.42188"
+           y="240.85156"
+           id="tspan43352"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43350">HBM Read
+</tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43356"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43354">Requests</tspan></tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       transform="matrix(0.26458333,0,0,0.26458333,23.273945,14.460151)"
+       id="text97991-3-6-7-7"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect97993-7-7-6-6);fill:#000000;fill-opacity:1;stroke:none" />
+    <g
+       id="g13641"
+       transform="matrix(0.51544312,0,0,0.51544312,62.851448,26.472254)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
+         id="rect31-4-1-7-2-9-1-1-7"
+         width="39.332031"
+         height="14.901806"
+         x="81.170959"
+         y="9.7946386" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.26458333,0,0,0.26458333,55.363782,-47.957714)"
+         id="text6326-8-2-8-6-3-2-8-5"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8-2-2-5-3-3);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="117.96094"
+           y="240.85156"
+           id="tspan43360"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43358">Remote Read
+</tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43364"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43362">Requests</tspan></tspan></text>
+    </g>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:0.252, 0.252;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow1Mend-36-0)"
+       d="M 61.835501,17.827625 H 72.628293"
+       id="path69449-1-4-5-5-0" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 31.07042,44.18384 V 61.525736"
+       id="path69449-9" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.251943;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 30.944463,26.804724 H 114.94704"
+       id="path69449-3" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 51.601269,28.986028 V 26.930672"
+       id="path69447-9-7-5-2" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="m 52.807545,30.383236 -1.17844,-1.410071"
+       id="path69447-9-7-5-6-7" />
+    <ellipse
+       style="fill:#ffffff;stroke:#000000;stroke-width:0.0583236;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="path2455-3-6-3-1"
+       cx="-51.593498"
+       cy="28.936687"
+       rx="0.26853767"
+       ry="0.29816741"
+       transform="scale(-1,1)" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.3"
+       d="M 51.601269,31.456218 V 30.329011"
+       id="path69447-9-7-5-2-5" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 51.601269,26.688316 V 22.058611"
+       id="path69447-9-7-5-2-3" />
+    <ellipse
+       style="fill:#ffffff;stroke:#000000;stroke-width:0.107;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="path2455-3-6-3-6"
+       cx="51.61137"
+       cy="28.97942"
+       rx="0.49265727"
+       ry="0.54701579" />
+    <ellipse
+       style="fill:#ffffff;stroke:#000000;stroke-width:0.107;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="path2455-3-6-3-6-2"
+       cx="52.914024"
+       cy="30.493517"
+       rx="0.49265727"
+       ry="0.54701579" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 114.82717,31.453752 V 22.110106"
+       id="path69447-9-7-5-2-3-9" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252188;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 30.9454,61.491997 h 84.16591"
+       id="path69449-3-1" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 51.601269,61.398519 V 56.768814"
+       id="path69447-9-7-5-2-3-93" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 75.335667,61.434975 V 56.80527"
+       id="path69447-9-7-5-2-3-93-0" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 83.861548,66.11139 V 61.481685"
+       id="path69447-9-7-5-2-3-93-62" />
+    <g
+       id="g10025-9-6-8"
+       transform="matrix(0.51544312,0,0,0.51544312,63.102859,44.045664)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
+         id="rect31-4-1-7-2-9-1-1-79"
+         width="39.332031"
+         height="14.901806"
+         x="81.170959"
+         y="9.7946386" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.26458333,0,0,0.26458333,55.363782,-47.957714)"
+         id="text6326-8-2-8-6-3-2-8-2"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8-2-2-5-3-6);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="129.40234"
+           y="240.85156"
+           id="tspan43368"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43366">HBM Write </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43372"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43370">Requests</tspan></tspan></text>
+    </g>
+    <g
+       id="g13641-0"
+       transform="matrix(0.51544312,0,0,0.51544312,63.102859,61.160138)">
+      <rect
+         style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
+         id="rect31-4-1-7-2-9-1-1-7-2"
+         width="39.332031"
+         height="14.901806"
+         x="81.170959"
+         y="9.7946386" />
+      <text
+         xml:space="preserve"
+         transform="matrix(0.26458333,0,0,0.26458333,55.363782,-47.957714)"
+         id="text6326-8-2-8-6-3-2-8-5-3"
+         style="font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect6328-0-6-8-2-2-5-3-3-1);fill:#000000;fill-opacity:1;stroke:none"
+         x="158.11719"
+         y="0"><tspan
+           x="116.94141"
+           y="240.85156"
+           id="tspan43376"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43374">Remote Write </tspan></tspan><tspan
+           x="134.80859"
+           y="260.85156"
+           id="tspan43380"><tspan
+             style="text-align:center;text-anchor:middle"
+             id="tspan43378">Requests</tspan></tspan></text>
+    </g>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       d="M 115.07858,66.141636 V 56.79799"
+       id="path69447-9-7-5-2-3-9-7" />
+  </g>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/gcn_compute_unit.png b/projects/rocprofiler-compute/docs/data/performance-model/gcn_compute_unit.png
new file mode 100644
index 0000000000..e6c1f2eb07
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/gcn_compute_unit.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.png b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.png
new file mode 100644
index 0000000000..fdabfbb955
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.svg b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.svg
new file mode 100644
index 0000000000..dd22a71319
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/performance-model/l1perf_model.svg
@@ -0,0 +1,584 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="127.5mm"
+   height="32.5mm"
+   viewBox="0 0 127.5 32.5"
+   version="1.1"
+   id="svg8"
+   inkscape:version="0.92.5 (2060ec1f9f, 2020-04-08)"
+   sodipodi:docname="l1perf_model2.svg"
+   inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/l1perf_model.png"
+   inkscape:export-xdpi="96"
+   inkscape:export-ydpi="96">
+  <defs
+     id="defs2">
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4559"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mstart"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4556"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(0.6)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Lstart"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4550"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="matrix(1.1,0,0,1.1,1.1,0)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker4982"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Lend">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path4980"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Lend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Lend-3"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4535-6"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(-0.8,0,0,-0.8,-10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Lstart-8"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4550-1"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="matrix(1.1,0,0,1.1,1.1,0)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Lstart-0"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4550-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="matrix(1.1,0,0,1.1,1.1,0)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Lend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Lend-3-4"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4535-6-0"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(-0.8,0,0,-0.8,-10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="5.6"
+     inkscape:cx="293.09582"
+     inkscape:cy="20.610939"
+     inkscape:document-units="mm"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="2488"
+     inkscape:window-height="1376"
+     inkscape:window-x="72"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     showguides="true" />
+  <metadata
+     id="metadata5">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-264.5)">
+    <rect
+       id="rect3717"
+       width="12.756697"
+       height="31.844492"
+       x="0.16001961"
+       y="264.8248"
+       style="fill:none;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.82222223px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="-280.79251"
+       y="7.3169599"
+       id="text4526"
+       transform="rotate(-90)"><tspan
+         sodipodi:role="line"
+         id="tspan1391"
+         x="-280.79251"
+         y="7.3169599">Compute Unit</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="19.342445"
+       y="269.90857"
+       id="text4816"><tspan
+         sodipodi:role="line"
+         x="19.342445"
+         y="269.90857"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.26458332"
+         id="tspan4818">Cmd/Data</tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.26753739;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4843"
+       width="52.205414"
+       height="31.841953"
+       x="37.515408"
+       y="264.82605"
+       ry="0"
+       rx="0" />
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.26433226;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847"
+       width="12.880525"
+       height="12.567707"
+       x="37.579292"
+       y="264.8248"
+       rx="1.0342027"
+       ry="0.37797582" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:5.28136396px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.13203409"
+       x="44.815056"
+       y="19.832005"
+       id="text4851"
+       transform="scale(1.3075045,0.76481574)"><tspan
+         sodipodi:role="line"
+         x="44.815056"
+         y="24.504774"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;line-height:0;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.13203409"
+         id="tspan4857" /></text>
+    <flowRoot
+       xml:space="preserve"
+       id="flowRoot4859"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none"><flowRegion
+         id="flowRegion4861"><rect
+           id="rect4863"
+           width="71.071426"
+           height="43.57143"
+           x="195.35715"
+           y="38.59111" /></flowRegion><flowPara
+         id="flowPara4865" /></flowRoot>    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.25103042"
+       x="40.514164"
+       y="264.33435"
+       id="text1823"
+       transform="scale(0.98186128,1.0184738)"><tspan
+         sodipodi:role="line"
+         x="40.514164"
+         y="264.33435"
+         id="tspan1813"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25103042"><tspan
+           x="40.514164"
+           y="264.33435"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25103042"
+           id="tspan1811">Address </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="39.07756"
+         y="266.98016"
+         id="tspan1817"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25103042"><tspan
+           x="39.07756"
+           y="266.98016"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25103042"
+           id="tspan1815">Processing </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="42.574501"
+         y="269.62601"
+         id="tspan1821"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25103042"><tspan
+           x="42.574501"
+           y="269.62601"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666679px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25103042"
+           id="tspan1819">Unit</tspan></tspan></text>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.21152194;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Arrow2Mstart);marker-end:url(#Arrow2Mend)"
+       d="m 43.986979,277.84477 v 5.7019"
+       id="path4904"
+       inkscape:connector-curvature="0" />
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847-2"
+       width="12.945681"
+       height="12.567707"
+       x="37.514137"
+       y="284.10156"
+       rx="1.0394342"
+       ry="0.37797582" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="-283.25629"
+       y="42.902218"
+       id="text5371"
+       transform="rotate(-90)"><tspan
+         sodipodi:role="line"
+         id="tspan5369"
+         x="-283.25629"
+         y="42.902218"
+         style="font-size:2.11666656px;stroke-width:0.26458332">Sync</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:1.25406051px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.2351373"
+       x="41.356129"
+       y="288.49612"
+       id="text940"><tspan
+         sodipodi:role="line"
+         x="41.356129"
+         y="288.49612"
+         id="tspan930"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.2351373"><tspan
+           x="41.356129"
+           y="288.49612"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.2351373"
+           id="tspan928">Data </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="38.231773"
+         y="291.14197"
+         id="tspan934"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.2351373"><tspan
+           x="38.231773"
+           y="291.14197"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.2351373"
+           id="tspan932">Processing </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="41.728718"
+         y="293.78778"
+         id="tspan938"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.2351373"><tspan
+           x="41.728718"
+           y="293.78778"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.2351373"
+           id="tspan936">Unit</tspan></tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847-6"
+       width="12.945681"
+       height="12.567707"
+       x="51.913925"
+       y="264.8251"
+       rx="1.0394342"
+       ry="0.37797582" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.25566792"
+       x="53.571041"
+       y="267.50827"
+       id="text1809"><tspan
+         sodipodi:role="line"
+         x="53.571041"
+         y="267.50827"
+         id="tspan1795"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"><tspan
+           x="53.571041"
+           y="267.50827"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"
+           id="tspan1793">Virtual To </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="54.174622"
+         y="270.15408"
+         id="tspan1799"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"><tspan
+           x="54.174622"
+           y="270.15408"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"
+           id="tspan1797">Physical </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="54.265057"
+         y="272.79993"
+         id="tspan1803"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"><tspan
+           x="54.265057"
+           y="272.79993"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"
+           id="tspan1801">Address </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="52.713211"
+         y="275.44577"
+         id="tspan1807"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"><tspan
+           x="52.713211"
+           y="275.44577"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.11666656px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"
+           id="tspan1805">Translation</tspan></tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847-6-4"
+       width="12.945681"
+       height="12.567707"
+       x="66.313728"
+       y="264.8251"
+       rx="1.0394342"
+       ry="0.37797582" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:1.36356223px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.25566792"
+       x="72.785965"
+       y="270.58688"
+       id="text1599"><tspan
+         sodipodi:role="line"
+         id="tspan1825"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;writing-mode:lr-tb;text-anchor:middle"
+         x="73.178444"
+         y="270.58688">Tag </tspan><tspan
+         sodipodi:role="line"
+         id="tspan1827"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;writing-mode:lr-tb;text-anchor:middle"
+         x="72.785965"
+         y="273.67368">RAM</tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.25661802;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847-6-4-5"
+       width="27.324272"
+       height="5.5836082"
+       x="51.880585"
+       y="277.95538"
+       rx="2.1939197"
+       ry="0.16792791" />
+    <flowRoot
+       xml:space="preserve"
+       id="flowRoot4896-6-6-6"
+       style="font-style:normal;font-weight:normal;font-size:5.33333349px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none"
+       transform="matrix(0.25566792,0,0,0.25566792,21.981703,13.242276)"><flowRegion
+         id="flowRegion4898-4-2-9"
+         style="font-size:5.33333349px"><rect
+           id="rect4900-0-6-8"
+           width="74.800339"
+           height="17.377857"
+           x="220.71429"
+           y="50.733971"
+           style="font-size:5.33333349px" /></flowRegion><flowPara
+         style="font-size:9.24484825px;text-align:center;text-anchor:middle"
+         id="flowPara5633-2" /></flowRoot>    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:1.36355662px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.25566792"
+       x="59.399857"
+       y="280.12439"
+       id="text914"><tspan
+         sodipodi:role="line"
+         x="59.399857"
+         y="280.12439"
+         id="tspan908"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"><tspan
+           x="59.399857"
+           y="280.12439"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"
+           id="tspan906">L1 Cache </tspan></tspan><tspan
+         sodipodi:role="line"
+         x="59.399857"
+         y="283.21121"
+         id="tspan912"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"><tspan
+           x="59.399857"
+           y="283.21121"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.25566792"
+           id="tspan910">Controller</tspan></tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.25977203;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847-6-4-5-7"
+       width="27.322025"
+       height="12.587595"
+       x="51.882832"
+       y="284.0853"
+       rx="2.1937392"
+       ry="0.37857395" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944444px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.25566792;"
+       x="61.700409"
+       y="289.7738"
+       id="text926"><tspan
+         sodipodi:role="line"
+         x="61.700409"
+         y="289.7738"
+         id="tspan920"
+         style="stroke-width:0.25566792;-inkscape-font-specification:'sans-serif, Normal';font-family:sans-serif;font-weight:normal;font-style:normal;font-stretch:normal;font-variant:normal;font-size:2.46944444px;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;"><tspan
+           x="61.700409"
+           y="289.7738"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944444px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr;text-anchor:start;stroke-width:0.25566792;"
+           id="tspan916">Cache</tspan><tspan
+           dx="0"
+           x="69.380043"
+           y="289.7738"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944444px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr;text-anchor:start;stroke-width:0.25566792;"
+           id="tspan918" /></tspan><tspan
+         sodipodi:role="line"
+         x="62.822392"
+         y="292.8606"
+         id="tspan924"
+         style="stroke-width:0.25566792;-inkscape-font-specification:'sans-serif, Normal';font-family:sans-serif;font-weight:normal;font-style:normal;font-stretch:normal;font-variant:normal;font-size:2.46944444px;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;"><tspan
+           x="62.822392"
+           y="292.8606"
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944444px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr;text-anchor:start;stroke-width:0.25566792;"
+           id="tspan922">RAM</tspan></tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.35042927;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4847-6-4-5-1"
+       width="8.9711571"
+       height="31.713299"
+       x="80.756233"
+       y="264.86783"
+       rx="0.72031188"
+       ry="0.95378256" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.36361098px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="-280.84842"
+       y="85.92308"
+       id="text4526-5"
+       transform="rotate(-90)"><tspan
+         sodipodi:role="line"
+         id="tspan5786"
+         x="-280.84842"
+         y="85.92308"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;writing-mode:lr-tb;text-anchor:middle">L2 Memory Interface</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="22.10359"
+       y="289.48212"
+       id="text4816-5"><tspan
+         sodipodi:role="line"
+         x="22.10359"
+         y="289.48212"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.26458332"
+         id="tspan4818-8">Data</tspan></text>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.17609379;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow1Lend-3)"
+       d="M 90.208048,280.72448 H 114.26491"
+       id="path4530-75"
+       inkscape:connector-curvature="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="99.92643"
+       y="278.60959"
+       id="text4816-3"><tspan
+         sodipodi:role="line"
+         x="99.92643"
+         y="278.60959"
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.46944451px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.26458332"
+         id="tspan4818-5">Bus</tspan></text>
+    <rect
+       id="rect3717-6"
+       width="12.756697"
+       height="31.844494"
+       x="114.5292"
+       y="264.75391"
+       style="fill:none;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.36361122px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
+       x="-280.73953"
+       y="121.95969"
+       id="text4526-5-2"
+       transform="rotate(-90)"><tspan
+         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.82222223px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;writing-mode:lr-tb;text-anchor:middle;stroke-width:0.26458332"
+         sodipodi:role="line"
+         id="tspan5786-9"
+         x="-280.73953"
+         y="121.95969">L2 Cache</tspan></text>
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.17609379;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Arrow2Lstart-8)"
+       d="m 13.179534,290.78598 h 24.05686"
+       id="path4530-75-1"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.17696008;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1Lend-3-4)"
+       d="M 13.04999,271.40894 H 37.370029"
+       id="path4530-75-3"
+       inkscape:connector-curvature="0" />
+  </g>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/lds.png b/projects/rocprofiler-compute/docs/data/performance-model/lds.png
new file mode 100644
index 0000000000..f444eaf539
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/lds.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/lds.svg b/projects/rocprofiler-compute/docs/data/performance-model/lds.svg
new file mode 100644
index 0000000000..c0adb5e912
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/performance-model/lds.svg
@@ -0,0 +1,393 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="78.030128mm"
+   height="59.956924mm"
+   viewBox="0 0 78.030128 59.956924"
+   version="1.1"
+   id="svg5"
+   inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
+   sodipodi:docname="lds.svg"
+   inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/lds.png"
+   inkscape:export-xdpi="180"
+   inkscape:export-ydpi="180"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview7"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:document-units="mm"
+     showgrid="false"
+     inkscape:zoom="2.8284271"
+     inkscape:cx="7.9549513"
+     inkscape:cy="93.161318"
+     inkscape:window-width="2490"
+     inkscape:window-height="1376"
+     inkscape:window-x="70"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0" />
+  <defs
+     id="defs2">
+    <marker
+       style="overflow:visible"
+       id="marker35467"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Mend"
+       inkscape:isstock="true">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path35465" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend"
+       inkscape:isstock="true">
+      <path
+         transform="scale(-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
+         id="path34770" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker35449"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Mend"
+       inkscape:isstock="true">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path35447" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Mend"
+       inkscape:isstock="true">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path34752" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker35115"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mstart"
+       inkscape:isstock="true">
+      <path
+         transform="scale(0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
+         id="path35113" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mstart"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mstart"
+       inkscape:isstock="true">
+      <path
+         transform="scale(0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
+         id="path34767" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lstart"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Lstart"
+       inkscape:isstock="true">
+      <path
+         transform="matrix(0.8,0,0,0.8,10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path34743" />
+    </marker>
+    <rect
+       x="175.93893"
+       y="253.99336"
+       width="234.12074"
+       height="100.54605"
+       id="rect3930" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend-5"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Mend"
+       inkscape:isstock="true">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path34752-6" />
+    </marker>
+  </defs>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-38.921551,-41.961155)">
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none"
+       id="rect846"
+       width="77.212677"
+       height="40.499405"
+       x="39.330276"
+       y="61.168674" />
+    <text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text3928"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect3930);fill:#000000;fill-opacity:1;stroke:none" />
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none"
+       id="rect846-3"
+       width="31.886179"
+       height="13.072078"
+       x="39.171551"
+       y="42.211155" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+       x="47.24173"
+       y="49.892689"
+       id="text25740"><tspan
+         sodipodi:role="line"
+         id="tspan25738"
+         style="font-size:3.52778px;stroke-width:0.264583"
+         x="47.24173"
+         y="49.892689">SIMD 0/1</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+       x="92.864143"
+       y="49.892689"
+       id="text25740-7"><tspan
+         sodipodi:role="line"
+         id="tspan25738-5"
+         style="font-size:3.52778px;stroke-width:0.264583"
+         x="92.864143"
+         y="49.892689">SIMD 2/3</tspan></text>
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none"
+       id="rect846-3-3"
+       width="31.886179"
+       height="13.072078"
+       x="84.815498"
+       y="42.211155" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.239054px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#marker35115);marker-end:url(#Arrow2Mend)"
+       d="m 55.11464,55.888713 v 4.575516"
+       id="path35236" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:0.239437px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#marker35115);marker-end:url(#Arrow2Mend)"
+       d="m 100.75859,55.863706 v 4.584345"
+       id="path35236-2" />
+    <g
+       id="g91757"
+       transform="translate(0.09337305,-1.490623)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect846-3-9"
+         width="57.741043"
+         height="6.0837841"
+         x="49.066093"
+         y="64.83651" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="62.166679"
+         y="69.054901"
+         id="text43239"><tspan
+           sodipodi:role="line"
+           id="tspan43237"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="62.166679"
+           y="69.054901">Conflict Detection</tspan></text>
+    </g>
+    <g
+       id="g94249">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect846-3-9-7"
+         width="57.741043"
+         height="6.0837841"
+         x="49.159466"
+         y="69.429672" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="69.083069"
+         y="73.648064"
+         id="text43239-9"><tspan
+           sodipodi:role="line"
+           id="tspan43237-2"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="69.083069"
+           y="73.648064">Scheduler</tspan></text>
+    </g>
+    <g
+       id="g75031">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.499999;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect53227"
+         width="10.155521"
+         height="20.193951"
+         x="41.817165"
+         y="78.846886" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="-95.113167"
+         y="48.210091"
+         id="text58467"
+         transform="rotate(-90)"><tspan
+           sodipodi:role="line"
+           id="tspan58465"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="-95.113167"
+           y="48.210091">Bank 0</tspan></text>
+    </g>
+    <g
+       id="g75036"
+       transform="translate(-0.28890355,0.10812378)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.499999;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect53227-1"
+         width="10.155521"
+         height="20.193951"
+         x="55.210358"
+         y="78.738762" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="-95.005043"
+         y="61.603283"
+         id="text58467-2"
+         transform="rotate(-90)"><tspan
+           sodipodi:role="line"
+           id="tspan58465-7"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="-95.005043"
+           y="61.603283">Bank 1</tspan></text>
+    </g>
+    <g
+       id="g75041"
+       transform="translate(-0.42640324,0.12826538)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.499999;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect53227-0"
+         width="10.155521"
+         height="20.193951"
+         x="68.452148"
+         y="78.71862" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="-94.984901"
+         y="74.845078"
+         id="text58467-9"
+         transform="rotate(-90)"><tspan
+           sodipodi:role="line"
+           id="tspan58465-3"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="-94.984901"
+           y="74.845078">Bank 2</tspan></text>
+    </g>
+    <g
+       id="g75046"
+       transform="translate(0,-0.36049652)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.499999;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect53227-6"
+         width="10.155521"
+         height="20.193951"
+         x="81.130043"
+         y="79.207382" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="-95.473663"
+         y="87.522972"
+         id="text58467-0"
+         transform="rotate(-90)"><tspan
+           sodipodi:role="line"
+           id="tspan58465-6"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="-95.473663"
+           y="87.522972">Bank 3</tspan></text>
+    </g>
+    <g
+       id="g75106"
+       transform="translate(0,0.88236237)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.499999;stroke-miterlimit:4;stroke-dasharray:none"
+         id="rect53227-2"
+         width="10.155521"
+         height="20.193951"
+         x="103.69362"
+         y="77.964523" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+         x="-95.307404"
+         y="110.08656"
+         id="text58467-6"
+         transform="rotate(-90)"><tspan
+           sodipodi:role="line"
+           id="tspan58465-1"
+           style="font-size:3.52778px;stroke-width:0.264583"
+           x="-95.307404"
+           y="110.08656">Bank 31</tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:3.52778px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+       x="95.809242"
+       y="89.162628"
+       id="text80928"><tspan
+         sodipodi:role="line"
+         id="tspan80926"
+         style="font-size:3.52778px;stroke-width:0.264583"
+         x="95.809242"
+         y="89.162628">...</tspan></text>
+  </g>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/nosplit.png b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.png
new file mode 100644
index 0000000000..a8e5f01649
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/nosplit.svg b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.svg
new file mode 100644
index 0000000000..d0d9606be5
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/performance-model/nosplit.svg
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="23.89308mm"
+   height="24.000153mm"
+   viewBox="0 0 23.89308 24.000153"
+   version="1.1"
+   id="svg370"
+   inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
+   sodipodi:docname="nosplit.svg"
+   inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/nosplit.png"
+   inkscape:export-xdpi="180"
+   inkscape:export-ydpi="180"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview372"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:document-units="mm"
+     showgrid="false"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0"
+     inkscape:zoom="6.2217172"
+     inkscape:cx="-18.242552"
+     inkscape:cy="62.764023"
+     inkscape:window-width="2490"
+     inkscape:window-height="1376"
+     inkscape:window-x="70"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1" />
+  <defs
+     id="defs367" />
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-30.085626,-76.587497)">
+    <path
+       style="fill:none;stroke:#000000;stroke-width:2.52;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 30.085626,88.555587 h 23.89308"
+       id="path69449-3-1"
+       inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/nosplit.png"
+       inkscape:export-xdpi="180"
+       inkscape:export-ydpi="180" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:2.52;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 34.808926,76.587497 v 10.78438"
+       id="path69449-3-1-6"
+       inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/nosplit.png"
+       inkscape:export-xdpi="180"
+       inkscape:export-ydpi="180" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:2.52;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 48.947196,89.803267 V 100.58765"
+       id="path69449-3-1-6-7"
+       inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/nosplit.png"
+       inkscape:export-xdpi="180"
+       inkscape:export-ydpi="180" />
+  </g>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/selayout.png b/projects/rocprofiler-compute/docs/data/performance-model/selayout.png
new file mode 100644
index 0000000000..73aa2b49de
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/selayout.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/split.png b/projects/rocprofiler-compute/docs/data/performance-model/split.png
new file mode 100644
index 0000000000..cca71eb2a4
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/split.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/split.svg b/projects/rocprofiler-compute/docs/data/performance-model/split.svg
new file mode 100644
index 0000000000..b033a9e111
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/performance-model/split.svg
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="23.89308mm"
+   height="23.893078mm"
+   viewBox="0 0 23.89308 23.893078"
+   version="1.1"
+   id="svg370"
+   inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
+   sodipodi:docname="split.svg"
+   inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/split.png"
+   inkscape:export-xdpi="180"
+   inkscape:export-ydpi="180"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview372"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:document-units="mm"
+     showgrid="false"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0"
+     inkscape:zoom="24.886869"
+     inkscape:cx="30.397556"
+     inkscape:cy="53.984292"
+     inkscape:window-width="2490"
+     inkscape:window-height="1376"
+     inkscape:window-x="70"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1" />
+  <defs
+     id="defs367" />
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-30.085626,-76.637833)">
+    <path
+       style="fill:none;stroke:#000000;stroke-width:2.52;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 30.085626,88.584373 h 23.89308"
+       id="path69449-3-1"
+       inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/split.png"
+       inkscape:export-xdpi="180"
+       inkscape:export-ydpi="180" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:2.52;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 42.032166,76.637833 V 100.53091"
+       id="path69449-3-1-3"
+       inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/split.png"
+       inkscape:export-xdpi="180"
+       inkscape:export-ydpi="180" />
+  </g>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/uncached.png b/projects/rocprofiler-compute/docs/data/performance-model/uncached.png
new file mode 100644
index 0000000000..f770a1b291
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/performance-model/uncached.png differ
diff --git a/projects/rocprofiler-compute/docs/data/performance-model/uncached.svg b/projects/rocprofiler-compute/docs/data/performance-model/uncached.svg
new file mode 100644
index 0000000000..53affd4fc6
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/performance-model/uncached.svg
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="27.472563mm"
+   height="17.499945mm"
+   viewBox="0 0 27.472563 17.499945"
+   version="1.1"
+   id="svg370"
+   inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
+   sodipodi:docname="uncached.svg"
+   inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/uncached.png"
+   inkscape:export-xdpi="180"
+   inkscape:export-ydpi="180"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview372"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:document-units="mm"
+     showgrid="false"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0"
+     inkscape:zoom="6.2217172"
+     inkscape:cx="-18.242552"
+     inkscape:cy="72.086208"
+     inkscape:window-width="2490"
+     inkscape:window-height="1376"
+     inkscape:window-x="70"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1" />
+  <defs
+     id="defs367">
+    <marker
+       style="overflow:visible"
+       id="TriangleOutS"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="TriangleOutS"
+       inkscape:isstock="true">
+      <path
+         transform="scale(0.2)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         id="path1132" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker1262"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend"
+       inkscape:isstock="true">
+      <path
+         transform="scale(-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
+         id="path1260" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend"
+       inkscape:isstock="true">
+      <path
+         transform="scale(-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
+         id="path1011" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Lend"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Lend"
+       inkscape:isstock="true">
+      <path
+         transform="matrix(-0.8,0,0,-0.8,-10,0)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path987" />
+    </marker>
+  </defs>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-30.085626,-74.157956)">
+    <path
+       style="fill:none;stroke:#000000;stroke-width:2.52;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.52, 2.52;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutS)"
+       d="m 30.085626,88.555587 h 23.89308"
+       id="path69449-3-1"
+       inkscape:export-filename="/home/nick/Documents/software_repos/omniperf/src/docs/images/nosplit.png"
+       inkscape:export-xdpi="180"
+       inkscape:export-ydpi="180" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
+       x="37.700855"
+       y="82.012749"
+       id="text7915"><tspan
+         sodipodi:role="line"
+         id="tspan7913"
+         style="stroke-width:0.264583"
+         x="37.700855"
+         y="82.012749">x2</tspan></text>
+  </g>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/profile/sample-roof-plot.png b/projects/rocprofiler-compute/docs/data/profile/sample-roof-plot.png
new file mode 100644
index 0000000000..2deaba7ad2
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profile/sample-roof-plot.png differ
diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.png b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.png
new file mode 100644
index 0000000000..bd74d62499
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.png differ
diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.svg b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.svg
new file mode 100644
index 0000000000..a854f697de
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsbandwidth.svg
@@ -0,0 +1,1579 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="460.8pt" height="345.6pt" viewBox="0 0 460.8 345.6" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2023-08-21T11:00:20.650499</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.7.1, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 345.6 
+L 460.8 345.6 
+L 460.8 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 66.08 299.32 
+L 450 299.32 
+L 450 10.8 
+L 66.08 10.8 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="ma9b59badde" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#ma9b59badde" x="83.530909" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- 1 -->
+      <g transform="translate(79.713409 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#ma9b59badde" x="169.75893" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- 64 -->
+      <g transform="translate(162.12393 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-36"/>
+       <use xlink:href="#DejaVuSans-34" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#ma9b59badde" x="257.355651" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- 128 -->
+      <g transform="translate(245.903151 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-38" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#ma9b59badde" x="344.952371" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- 192 -->
+      <g transform="translate(333.499871 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-39" d="M 703 97 
+L 703 672 
+Q 941 559 1184 500 
+Q 1428 441 1663 441 
+Q 2288 441 2617 861 
+Q 2947 1281 2994 2138 
+Q 2813 1869 2534 1725 
+Q 2256 1581 1919 1581 
+Q 1219 1581 811 2004 
+Q 403 2428 403 3163 
+Q 403 3881 828 4315 
+Q 1253 4750 1959 4750 
+Q 2769 4750 3195 4129 
+Q 3622 3509 3622 2328 
+Q 3622 1225 3098 567 
+Q 2575 -91 1691 -91 
+Q 1453 -91 1209 -44 
+Q 966 3 703 97 
+z
+M 1959 2075 
+Q 2384 2075 2632 2365 
+Q 2881 2656 2881 3163 
+Q 2881 3666 2632 3958 
+Q 2384 4250 1959 4250 
+Q 1534 4250 1286 3958 
+Q 1038 3666 1038 3163 
+Q 1038 2656 1286 2365 
+Q 1534 2075 1959 2075 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-39" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#ma9b59badde" x="432.549091" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 256 -->
+      <g transform="translate(421.096591 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-35" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-36" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_6">
+     <!-- Kernel Dispatch [N] -->
+     <g transform="translate(188.216875 332.8755) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-4b" d="M 628 4666 
+L 1259 4666 
+L 1259 2694 
+L 3353 4666 
+L 4166 4666 
+L 1850 2491 
+L 4331 0 
+L 3500 0 
+L 1259 2247 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-68" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-5b" d="M 550 4863 
+L 1875 4863 
+L 1875 4416 
+L 1125 4416 
+L 1125 -397 
+L 1875 -397 
+L 1875 -844 
+L 550 -844 
+L 550 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-4e" d="M 628 4666 
+L 1478 4666 
+L 3547 763 
+L 3547 4666 
+L 4159 4666 
+L 4159 0 
+L 3309 0 
+L 1241 3903 
+L 1241 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-5d" d="M 1947 4863 
+L 1947 -844 
+L 622 -844 
+L 622 -397 
+L 1369 -397 
+L 1369 4416 
+L 622 4416 
+L 622 4863 
+L 1947 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-4b"/>
+      <use xlink:href="#DejaVuSans-65" x="60.576172"/>
+      <use xlink:href="#DejaVuSans-72" x="122.099609"/>
+      <use xlink:href="#DejaVuSans-6e" x="161.462891"/>
+      <use xlink:href="#DejaVuSans-65" x="224.841797"/>
+      <use xlink:href="#DejaVuSans-6c" x="286.365234"/>
+      <use xlink:href="#DejaVuSans-20" x="314.148438"/>
+      <use xlink:href="#DejaVuSans-44" x="345.935547"/>
+      <use xlink:href="#DejaVuSans-69" x="422.9375"/>
+      <use xlink:href="#DejaVuSans-73" x="450.720703"/>
+      <use xlink:href="#DejaVuSans-70" x="502.820312"/>
+      <use xlink:href="#DejaVuSans-61" x="566.296875"/>
+      <use xlink:href="#DejaVuSans-74" x="627.576172"/>
+      <use xlink:href="#DejaVuSans-63" x="666.785156"/>
+      <use xlink:href="#DejaVuSans-68" x="721.765625"/>
+      <use xlink:href="#DejaVuSans-20" x="785.144531"/>
+      <use xlink:href="#DejaVuSans-5b" x="816.931641"/>
+      <use xlink:href="#DejaVuSans-4e" x="855.945312"/>
+      <use xlink:href="#DejaVuSans-5d" x="930.75"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_6">
+      <defs>
+       <path id="m1fa1725574" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m1fa1725574" x="66.08" y="287.234046" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 0 -->
+      <g transform="translate(51.445 291.793109) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m1fa1725574" x="66.08" y="235.804456" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 200 -->
+      <g transform="translate(36.175 240.363519) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m1fa1725574" x="66.08" y="184.374866" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 400 -->
+      <g transform="translate(36.175 188.933929) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-34"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m1fa1725574" x="66.08" y="132.945276" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_10">
+      <!-- 600 -->
+      <g transform="translate(36.175 137.504339) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-36"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_10">
+      <g>
+       <use xlink:href="#m1fa1725574" x="66.08" y="81.515686" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 800 -->
+      <g transform="translate(36.175 86.074749) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-38"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#m1fa1725574" x="66.08" y="30.086096" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 1000 -->
+      <g transform="translate(28.54 34.645159) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+       <use xlink:href="#DejaVuSans-30" x="190.869141"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_13">
+     <!-- Bandwidth [B] -->
+     <g transform="translate(21.54525 206.237375) rotate(-90) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-42" d="M 1259 2228 
+L 1259 519 
+L 2272 519 
+Q 2781 519 3026 730 
+Q 3272 941 3272 1375 
+Q 3272 1813 3026 2020 
+Q 2781 2228 2272 2228 
+L 1259 2228 
+z
+M 1259 4147 
+L 1259 2741 
+L 2194 2741 
+Q 2656 2741 2882 2914 
+Q 3109 3088 3109 3444 
+Q 3109 3797 2882 3972 
+Q 2656 4147 2194 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 2241 4666 
+Q 2963 4666 3353 4366 
+Q 3744 4066 3744 3513 
+Q 3744 3084 3544 2831 
+Q 3344 2578 2956 2516 
+Q 3422 2416 3680 2098 
+Q 3938 1781 3938 1306 
+Q 3938 681 3513 340 
+Q 3088 0 2303 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-77" d="M 269 3500 
+L 844 3500 
+L 1563 769 
+L 2278 3500 
+L 2956 3500 
+L 3675 769 
+L 4391 3500 
+L 4966 3500 
+L 4050 0 
+L 3372 0 
+L 2619 2869 
+L 1863 0 
+L 1184 0 
+L 269 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-42"/>
+      <use xlink:href="#DejaVuSans-61" x="68.603516"/>
+      <use xlink:href="#DejaVuSans-6e" x="129.882812"/>
+      <use xlink:href="#DejaVuSans-64" x="193.261719"/>
+      <use xlink:href="#DejaVuSans-77" x="256.738281"/>
+      <use xlink:href="#DejaVuSans-69" x="338.525391"/>
+      <use xlink:href="#DejaVuSans-64" x="366.308594"/>
+      <use xlink:href="#DejaVuSans-74" x="429.785156"/>
+      <use xlink:href="#DejaVuSans-68" x="468.994141"/>
+      <use xlink:href="#DejaVuSans-20" x="532.373047"/>
+      <use xlink:href="#DejaVuSans-5b" x="564.160156"/>
+      <use xlink:href="#DejaVuSans-42" x="603.173828"/>
+      <use xlink:href="#DejaVuSans-5d" x="671.777344"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_12">
+    <defs>
+     <path id="m6676b56e61" d="M 0 2 
+C 0.530406 2 1.03916 1.789267 1.414214 1.414214 
+C 1.789267 1.03916 2 0.530406 2 0 
+C 2 -0.530406 1.789267 -1.03916 1.414214 -1.414214 
+C 1.03916 -1.789267 0.530406 -2 0 -2 
+C -0.530406 -2 -1.03916 -1.789267 -1.414214 -1.414214 
+C -1.789267 -1.03916 -2 -0.530406 -2 0 
+C -2 0.530406 -1.789267 1.03916 -1.414214 1.414214 
+C -1.03916 1.789267 -0.530406 2 0 2 
+z
+" style="stroke: #440154"/>
+    </defs>
+    <g clip-path="url(#p8aa07be5e1)">
+     <use xlink:href="#m6676b56e61" x="83.530909" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="84.899608" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="86.268307" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="87.637005" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="89.005704" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="90.374403" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="91.743102" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="93.1118" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="94.480499" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="95.849198" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="97.217897" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="98.586595" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="99.955294" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="101.323993" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="102.692692" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="104.06139" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="105.430089" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="106.798788" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="108.167487" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="109.536185" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="110.904884" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="112.273583" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="113.642282" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="115.01098" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="116.379679" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="117.748378" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="119.117077" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="120.485775" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="121.854474" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="123.223173" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="124.591872" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="125.96057" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="127.329269" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="128.697968" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="130.066667" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="131.435365" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="132.804064" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="134.172763" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="135.541462" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="136.91016" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="138.278859" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="139.647558" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="141.016257" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="142.384955" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="143.753654" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="145.122353" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="146.491052" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="147.85975" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="149.228449" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="150.597148" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="151.965847" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="153.334545" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="154.703244" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="156.071943" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="157.440642" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="158.80934" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="160.178039" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="161.546738" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="162.915437" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="164.284135" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="165.652834" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="167.021533" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="168.390232" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="169.75893" y="221.404171" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="171.127629" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="172.496328" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="173.865027" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="175.233725" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="176.602424" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="177.971123" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="179.339822" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="180.70852" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="182.077219" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="183.445918" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="184.814617" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="186.183316" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="187.552014" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="188.920713" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="190.289412" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="191.658111" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="193.026809" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="194.395508" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="195.764207" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="197.132906" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="198.501604" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="199.870303" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="201.239002" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="202.607701" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="203.976399" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="205.345098" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="206.713797" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="208.082496" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="209.451194" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="210.819893" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="212.188592" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="213.557291" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="214.925989" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="216.294688" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="217.663387" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="219.032086" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="220.400784" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="221.769483" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="223.138182" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="224.506881" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="225.875579" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="227.244278" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="228.612977" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="229.981676" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="231.350374" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="232.719073" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="234.087772" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="235.456471" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="236.825169" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="238.193868" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="239.562567" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="240.931266" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="242.299964" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="243.668663" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="245.037362" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="246.406061" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="247.774759" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="249.143458" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="250.512157" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="251.880856" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="253.249554" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="254.618253" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="255.986952" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="257.355651" y="155.574296" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="258.724349" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="260.093048" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="261.461747" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="262.830446" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="264.199144" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="265.567843" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="266.936542" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="268.305241" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="269.673939" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="271.042638" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="272.411337" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="273.780036" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="275.148734" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="276.517433" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="277.886132" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="279.254831" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="280.623529" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="281.992228" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="283.360927" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="284.729626" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="286.098324" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="287.467023" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="288.835722" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="290.204421" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="291.573119" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="292.941818" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="294.310517" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="295.679216" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="297.047914" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="298.416613" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="299.785312" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="301.154011" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="302.522709" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="303.891408" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="305.260107" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="306.628806" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="307.997504" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="309.366203" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="310.734902" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="312.103601" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="313.472299" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="314.840998" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="316.209697" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="317.578396" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="318.947094" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="320.315793" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="321.684492" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="323.053191" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="324.421889" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="325.790588" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="327.159287" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="328.527986" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="329.896684" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="331.265383" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="332.634082" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="334.002781" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="335.37148" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="336.740178" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="338.108877" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="339.477576" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="340.846275" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="342.214973" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="343.583672" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="344.952371" y="89.744421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="346.32107" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="347.689768" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="349.058467" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="350.427166" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="351.795865" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="353.164563" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="354.533262" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="355.901961" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="357.27066" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="358.639358" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="360.008057" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="361.376756" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="362.745455" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="364.114153" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="365.482852" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="366.851551" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="368.22025" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="369.588948" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="370.957647" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="372.326346" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="373.695045" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="375.063743" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="376.432442" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="377.801141" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="379.16984" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="380.538538" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="381.907237" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="383.275936" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="384.644635" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="386.013333" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="387.382032" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="388.750731" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="390.11943" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="391.488128" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="392.856827" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="394.225526" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="395.594225" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="396.962923" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="398.331622" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="399.700321" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="401.06902" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="402.437718" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="403.806417" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="405.175116" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="406.543815" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="407.912513" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="409.281212" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="410.649911" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="412.01861" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="413.387308" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="414.756007" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="416.124706" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="417.493405" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="418.862103" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="420.230802" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="421.599501" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="422.9682" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="424.336898" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="425.705597" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="427.074296" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="428.442995" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="429.811693" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="431.180392" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#m6676b56e61" x="432.549091" y="23.914545" style="fill-opacity: 0; stroke: #440154"/>
+    </g>
+   </g>
+   <g id="line2d_13">
+    <defs>
+     <path id="mb17681182d" d="M -5.5 -0 
+L 5.5 5.5 
+L 5.5 -5.5 
+z
+" style="stroke: #21918c; stroke-linejoin: miter"/>
+    </defs>
+    <g clip-path="url(#p8aa07be5e1)">
+     <use xlink:href="#mb17681182d" x="83.530909" y="286.205455" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="84.899608" y="285.176863" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="86.268307" y="284.148271" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="87.637005" y="283.119679" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="89.005704" y="282.091087" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="90.374403" y="281.062496" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="91.743102" y="280.033904" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="93.1118" y="279.005312" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="94.480499" y="277.97672" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="95.849198" y="276.948128" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="97.217897" y="275.919537" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="98.586595" y="274.890945" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="99.955294" y="273.862353" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="101.323993" y="272.833761" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="102.692692" y="271.805169" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="104.06139" y="270.776578" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="105.430089" y="269.747986" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="106.798788" y="268.719394" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="108.167487" y="267.690802" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="109.536185" y="266.66221" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="110.904884" y="265.633619" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="112.273583" y="264.605027" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="113.642282" y="263.576435" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="115.01098" y="262.547843" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="116.379679" y="261.519251" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="117.748378" y="260.49066" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="119.117077" y="259.462068" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="120.485775" y="258.433476" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="121.854474" y="257.404884" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="123.223173" y="256.376292" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="124.591872" y="255.347701" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="125.96057" y="254.319109" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="127.329269" y="253.290517" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="128.697968" y="252.261925" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="130.066667" y="251.233333" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="131.435365" y="250.204742" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="132.804064" y="249.17615" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="134.172763" y="248.147558" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="135.541462" y="247.118966" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="136.91016" y="246.090374" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="138.278859" y="245.061783" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="139.647558" y="244.033191" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="141.016257" y="243.004599" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="142.384955" y="241.976007" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="143.753654" y="240.947415" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="145.122353" y="239.918824" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="146.491052" y="238.890232" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="147.85975" y="237.86164" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="149.228449" y="236.833048" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="150.597148" y="235.804456" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="151.965847" y="234.775865" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="153.334545" y="233.747273" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="154.703244" y="232.718681" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="156.071943" y="231.690089" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="157.440642" y="230.661497" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="158.80934" y="229.632906" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="160.178039" y="228.604314" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="161.546738" y="227.575722" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="162.915437" y="226.54713" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="164.284135" y="225.518538" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="165.652834" y="224.489947" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="167.021533" y="223.461355" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="168.390232" y="222.432763" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="169.75893" y="221.404171" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="171.127629" y="220.375579" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="172.496328" y="219.346988" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="173.865027" y="218.318396" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="175.233725" y="217.289804" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="176.602424" y="216.261212" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="177.971123" y="215.23262" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="179.339822" y="214.204029" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="180.70852" y="213.175437" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="182.077219" y="212.146845" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="183.445918" y="211.118253" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="184.814617" y="210.089661" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="186.183316" y="209.06107" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="187.552014" y="208.032478" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="188.920713" y="207.003886" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="190.289412" y="205.975294" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="191.658111" y="204.946702" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="193.026809" y="203.918111" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="194.395508" y="202.889519" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="195.764207" y="201.860927" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="197.132906" y="200.832335" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="198.501604" y="199.803743" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="199.870303" y="198.775152" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="201.239002" y="197.74656" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="202.607701" y="196.717968" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="203.976399" y="195.689376" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="205.345098" y="194.660784" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="206.713797" y="193.632193" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="208.082496" y="192.603601" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="209.451194" y="191.575009" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="210.819893" y="190.546417" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="212.188592" y="189.517825" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="213.557291" y="188.489234" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="214.925989" y="187.460642" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="216.294688" y="186.43205" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="217.663387" y="185.403458" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="219.032086" y="184.374866" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="220.400784" y="183.346275" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="221.769483" y="182.317683" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="223.138182" y="181.289091" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="224.506881" y="180.260499" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="225.875579" y="179.231907" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="227.244278" y="178.203316" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="228.612977" y="177.174724" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="229.981676" y="176.146132" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="231.350374" y="175.11754" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="232.719073" y="174.088948" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="234.087772" y="173.060357" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="235.456471" y="172.031765" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="236.825169" y="171.003173" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="238.193868" y="169.974581" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="239.562567" y="168.945989" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="240.931266" y="167.917398" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="242.299964" y="166.888806" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="243.668663" y="165.860214" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="245.037362" y="164.831622" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="246.406061" y="163.80303" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="247.774759" y="162.774439" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="249.143458" y="161.745847" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="250.512157" y="160.717255" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="251.880856" y="159.688663" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="253.249554" y="158.660071" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="254.618253" y="157.63148" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="255.986952" y="156.602888" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="257.355651" y="155.574296" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="258.724349" y="154.545704" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="260.093048" y="153.517112" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="261.461747" y="152.48852" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="262.830446" y="151.459929" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="264.199144" y="150.431337" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="265.567843" y="149.402745" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="266.936542" y="148.374153" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="268.305241" y="147.345561" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="269.673939" y="146.31697" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="271.042638" y="145.288378" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="272.411337" y="144.259786" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="273.780036" y="143.231194" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="275.148734" y="142.202602" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="276.517433" y="141.174011" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="277.886132" y="140.145419" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="279.254831" y="139.116827" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="280.623529" y="138.088235" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="281.992228" y="137.059643" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="283.360927" y="136.031052" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="284.729626" y="135.00246" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="286.098324" y="133.973868" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="287.467023" y="132.945276" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="288.835722" y="131.916684" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="290.204421" y="130.888093" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="291.573119" y="129.859501" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="292.941818" y="128.830909" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="294.310517" y="127.802317" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="295.679216" y="126.773725" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="297.047914" y="125.745134" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="298.416613" y="124.716542" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="299.785312" y="123.68795" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="301.154011" y="122.659358" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="302.522709" y="121.630766" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="303.891408" y="120.602175" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="305.260107" y="119.573583" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="306.628806" y="118.544991" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="307.997504" y="117.516399" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="309.366203" y="116.487807" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="310.734902" y="115.459216" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="312.103601" y="114.430624" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="313.472299" y="113.402032" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="314.840998" y="112.37344" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="316.209697" y="111.344848" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="317.578396" y="110.316257" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="318.947094" y="109.287665" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="320.315793" y="108.259073" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="321.684492" y="107.230481" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="323.053191" y="106.201889" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="324.421889" y="105.173298" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="325.790588" y="104.144706" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="327.159287" y="103.116114" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="328.527986" y="102.087522" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="329.896684" y="101.05893" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="331.265383" y="100.030339" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="332.634082" y="99.001747" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="334.002781" y="97.973155" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="335.37148" y="96.944563" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="336.740178" y="95.915971" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="338.108877" y="94.88738" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="339.477576" y="93.858788" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="340.846275" y="92.830196" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="342.214973" y="91.801604" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="343.583672" y="90.773012" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="344.952371" y="89.744421" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="346.32107" y="88.715829" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="347.689768" y="87.687237" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="349.058467" y="86.658645" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="350.427166" y="85.630053" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="351.795865" y="84.601462" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="353.164563" y="83.57287" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="354.533262" y="82.544278" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="355.901961" y="81.515686" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="357.27066" y="80.487094" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="358.639358" y="79.458503" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="360.008057" y="78.429911" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="361.376756" y="77.401319" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="362.745455" y="76.372727" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="364.114153" y="75.344135" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="365.482852" y="74.315544" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="366.851551" y="73.286952" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="368.22025" y="72.25836" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="369.588948" y="71.229768" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="370.957647" y="70.201176" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="372.326346" y="69.172585" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="373.695045" y="68.143993" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="375.063743" y="67.115401" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="376.432442" y="66.086809" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="377.801141" y="65.058217" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="379.16984" y="64.029626" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="380.538538" y="63.001034" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="381.907237" y="61.972442" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="383.275936" y="60.94385" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="384.644635" y="59.915258" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="386.013333" y="58.886667" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="387.382032" y="57.858075" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="388.750731" y="56.829483" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="390.11943" y="55.800891" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="391.488128" y="54.772299" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="392.856827" y="53.743708" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="394.225526" y="52.715116" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="395.594225" y="51.686524" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="396.962923" y="50.657932" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="398.331622" y="49.62934" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="399.700321" y="48.600749" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="401.06902" y="47.572157" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="402.437718" y="46.543565" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="403.806417" y="45.514973" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="405.175116" y="44.486381" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="406.543815" y="43.45779" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="407.912513" y="42.429198" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="409.281212" y="41.400606" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="410.649911" y="40.372014" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="412.01861" y="39.343422" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="413.387308" y="38.314831" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="414.756007" y="37.286239" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="416.124706" y="36.257647" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="417.493405" y="35.229055" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="418.862103" y="34.200463" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="420.230802" y="33.171872" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="421.599501" y="32.14328" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="422.9682" y="31.114688" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="424.336898" y="30.086096" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="425.705597" y="29.057504" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="427.074296" y="28.028913" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="428.442995" y="27.000321" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="429.811693" y="25.971729" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="431.180392" y="24.943137" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mb17681182d" x="432.549091" y="23.914545" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+    </g>
+   </g>
+   <g id="line2d_14">
+    <path d="M 432.549091 299.32 
+L 432.549091 10.8 
+" clip-path="url(#p8aa07be5e1)" style="fill: none; stroke-dasharray: 5.55,2.4; stroke-dashoffset: 0; stroke: #000000; stroke-width: 1.5"/>
+   </g>
+   <g id="line2d_15">
+    <path d="M 344.952371 299.32 
+L 344.952371 10.8 
+" clip-path="url(#p8aa07be5e1)" style="fill: none; stroke-dasharray: 5.55,2.4; stroke-dashoffset: 0; stroke: #000000; stroke-width: 1.5"/>
+   </g>
+   <g id="line2d_16">
+    <path d="M 257.355651 299.32 
+L 257.355651 10.8 
+" clip-path="url(#p8aa07be5e1)" style="fill: none; stroke-dasharray: 5.55,2.4; stroke-dashoffset: 0; stroke: #000000; stroke-width: 1.5"/>
+   </g>
+   <g id="line2d_17">
+    <path d="M 169.75893 299.32 
+L 169.75893 10.8 
+" clip-path="url(#p8aa07be5e1)" style="fill: none; stroke-dasharray: 5.55,2.4; stroke-dashoffset: 0; stroke: #000000; stroke-width: 1.5"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 83.530909 299.32 
+L 83.530909 10.8 
+" clip-path="url(#p8aa07be5e1)" style="fill: none; stroke-dasharray: 5.55,2.4; stroke-dashoffset: 0; stroke: #000000; stroke-width: 1.5"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 66.08 299.32 
+L 66.08 10.8 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 450 299.32 
+L 450 10.8 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 66.08 299.32 
+L 450 299.32 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 66.08 10.8 
+L 450 10.8 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 201.82125 294.32 
+L 314.25875 294.32 
+Q 316.25875 294.32 316.25875 292.32 
+L 316.25875 255.447 
+Q 316.25875 253.447 314.25875 253.447 
+L 201.82125 253.447 
+Q 199.82125 253.447 199.82125 255.447 
+L 199.82125 292.32 
+Q 199.82125 294.32 201.82125 294.32 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_19">
+     <g>
+      <use xlink:href="#m6676b56e61" x="213.82125" y="264.88875" style="fill-opacity: 0; stroke: #440154"/>
+     </g>
+    </g>
+    <g id="text_14">
+     <!-- Theoretical -->
+     <g transform="translate(231.82125 268.38875) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-54" d="M -19 4666 
+L 3928 4666 
+L 3928 4134 
+L 2272 4134 
+L 2272 0 
+L 1638 0 
+L 1638 4134 
+L -19 4134 
+L -19 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-54"/>
+      <use xlink:href="#DejaVuSans-68" x="61.083984"/>
+      <use xlink:href="#DejaVuSans-65" x="124.462891"/>
+      <use xlink:href="#DejaVuSans-6f" x="185.986328"/>
+      <use xlink:href="#DejaVuSans-72" x="247.167969"/>
+      <use xlink:href="#DejaVuSans-65" x="286.03125"/>
+      <use xlink:href="#DejaVuSans-74" x="347.554688"/>
+      <use xlink:href="#DejaVuSans-69" x="386.763672"/>
+      <use xlink:href="#DejaVuSans-63" x="414.546875"/>
+      <use xlink:href="#DejaVuSans-61" x="469.527344"/>
+      <use xlink:href="#DejaVuSans-6c" x="530.806641"/>
+     </g>
+    </g>
+    <g id="line2d_20">
+     <g>
+      <use xlink:href="#mb17681182d" x="213.82125" y="283.82525" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     </g>
+    </g>
+    <g id="text_15">
+     <!-- Effective -->
+     <g transform="translate(231.82125 287.32525) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-45" d="M 628 4666 
+L 3578 4666 
+L 3578 4134 
+L 1259 4134 
+L 1259 2753 
+L 3481 2753 
+L 3481 2222 
+L 1259 2222 
+L 1259 531 
+L 3634 531 
+L 3634 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-76" d="M 191 3500 
+L 800 3500 
+L 1894 563 
+L 2988 3500 
+L 3597 3500 
+L 2284 0 
+L 1503 0 
+L 191 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-45"/>
+      <use xlink:href="#DejaVuSans-66" x="63.183594"/>
+      <use xlink:href="#DejaVuSans-66" x="98.388672"/>
+      <use xlink:href="#DejaVuSans-65" x="133.59375"/>
+      <use xlink:href="#DejaVuSans-63" x="195.117188"/>
+      <use xlink:href="#DejaVuSans-74" x="250.097656"/>
+      <use xlink:href="#DejaVuSans-69" x="289.306641"/>
+      <use xlink:href="#DejaVuSans-76" x="317.089844"/>
+      <use xlink:href="#DejaVuSans-65" x="376.269531"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="p8aa07be5e1">
+   <rect x="66.08" y="10.8" width="383.92" height="288.52"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.png b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.png
new file mode 100644
index 0000000000..ab057f3cd9
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.png differ
diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.svg b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.svg
new file mode 100644
index 0000000000..f98e9bc4a6
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflictrate.svg
@@ -0,0 +1,1050 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="460.8pt" height="345.6pt" viewBox="0 0 460.8 345.6" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2023-08-21T11:43:04.336525</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.7.1, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 345.6 
+L 460.8 345.6 
+L 460.8 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 58.43 299.32 
+L 450 299.32 
+L 450 37.80224 
+L 58.43 37.80224 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="mb788961b6a" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mb788961b6a" x="58.43" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- 0 -->
+      <g transform="translate(54.6125 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#mb788961b6a" x="116.742733" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- 10 -->
+      <g transform="translate(109.107733 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#mb788961b6a" x="175.055465" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- 20 -->
+      <g transform="translate(167.420465 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#mb788961b6a" x="233.368198" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- 30 -->
+      <g transform="translate(225.733198 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-33"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#mb788961b6a" x="291.680931" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 40 -->
+      <g transform="translate(284.045931 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-34"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#mb788961b6a" x="349.993663" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 50 -->
+      <g transform="translate(342.358663 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-35"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#mb788961b6a" x="408.306396" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 60 -->
+      <g transform="translate(400.671396 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-36"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_8">
+     <!-- Kernel Dispatch [N] -->
+     <g transform="translate(184.391875 332.8755) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-4b" d="M 628 4666 
+L 1259 4666 
+L 1259 2694 
+L 3353 4666 
+L 4166 4666 
+L 1850 2491 
+L 4331 0 
+L 3500 0 
+L 1259 2247 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-68" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-5b" d="M 550 4863 
+L 1875 4863 
+L 1875 4416 
+L 1125 4416 
+L 1125 -397 
+L 1875 -397 
+L 1875 -844 
+L 550 -844 
+L 550 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-4e" d="M 628 4666 
+L 1478 4666 
+L 3547 763 
+L 3547 4666 
+L 4159 4666 
+L 4159 0 
+L 3309 0 
+L 1241 3903 
+L 1241 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-5d" d="M 1947 4863 
+L 1947 -844 
+L 622 -844 
+L 622 -397 
+L 1369 -397 
+L 1369 4416 
+L 622 4416 
+L 622 4863 
+L 1947 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-4b"/>
+      <use xlink:href="#DejaVuSans-65" x="60.576172"/>
+      <use xlink:href="#DejaVuSans-72" x="122.099609"/>
+      <use xlink:href="#DejaVuSans-6e" x="161.462891"/>
+      <use xlink:href="#DejaVuSans-65" x="224.841797"/>
+      <use xlink:href="#DejaVuSans-6c" x="286.365234"/>
+      <use xlink:href="#DejaVuSans-20" x="314.148438"/>
+      <use xlink:href="#DejaVuSans-44" x="345.935547"/>
+      <use xlink:href="#DejaVuSans-69" x="422.9375"/>
+      <use xlink:href="#DejaVuSans-73" x="450.720703"/>
+      <use xlink:href="#DejaVuSans-70" x="502.820312"/>
+      <use xlink:href="#DejaVuSans-61" x="566.296875"/>
+      <use xlink:href="#DejaVuSans-74" x="627.576172"/>
+      <use xlink:href="#DejaVuSans-63" x="666.785156"/>
+      <use xlink:href="#DejaVuSans-68" x="721.765625"/>
+      <use xlink:href="#DejaVuSans-20" x="785.144531"/>
+      <use xlink:href="#DejaVuSans-5b" x="816.931641"/>
+      <use xlink:href="#DejaVuSans-4e" x="855.945312"/>
+      <use xlink:href="#DejaVuSans-5d" x="930.75"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_8">
+      <defs>
+       <path id="mc1e45fdb99" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mc1e45fdb99" x="58.43" y="287.432829" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 0 -->
+      <g transform="translate(43.795 291.991892) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#mc1e45fdb99" x="58.43" y="238.350317" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_10">
+      <!-- 20 -->
+      <g transform="translate(36.16 242.909379) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_10">
+      <g>
+       <use xlink:href="#mc1e45fdb99" x="58.43" y="189.267805" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 40 -->
+      <g transform="translate(36.16 193.826867) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-34"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#mc1e45fdb99" x="58.43" y="140.185293" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 60 -->
+      <g transform="translate(36.16 144.744355) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-36"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#mc1e45fdb99" x="58.43" y="91.102781" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 80 -->
+      <g transform="translate(36.16 95.661843) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-38"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#mc1e45fdb99" x="58.43" y="42.020268" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 100 -->
+      <g transform="translate(28.525 46.579331) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_15">
+     <!-- Percent -->
+     <g transform="translate(21.53025 195.64437) rotate(-90) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-50" d="M 1259 4147 
+L 1259 2394 
+L 2053 2394 
+Q 2494 2394 2734 2622 
+Q 2975 2850 2975 3272 
+Q 2975 3691 2734 3919 
+Q 2494 4147 2053 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 2053 4666 
+Q 2838 4666 3239 4311 
+Q 3641 3956 3641 3272 
+Q 3641 2581 3239 2228 
+Q 2838 1875 2053 1875 
+L 1259 1875 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-50"/>
+      <use xlink:href="#DejaVuSans-65" x="56.677734"/>
+      <use xlink:href="#DejaVuSans-72" x="118.201172"/>
+      <use xlink:href="#DejaVuSans-63" x="157.064453"/>
+      <use xlink:href="#DejaVuSans-65" x="212.044922"/>
+      <use xlink:href="#DejaVuSans-6e" x="273.568359"/>
+      <use xlink:href="#DejaVuSans-74" x="336.947266"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_14">
+    <defs>
+     <path id="mf7e5d00153" d="M -5.5 -0 
+L 5.5 5.5 
+L 5.5 -5.5 
+z
+" style="stroke: #21918c; stroke-linejoin: miter"/>
+    </defs>
+    <g clip-path="url(#pae68a78108)">
+     <use xlink:href="#mf7e5d00153" x="64.261273" y="287.432829" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="70.092547" y="283.598258" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="75.92382" y="279.763687" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="81.755093" y="275.929115" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="87.586366" y="272.094544" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="93.41764" y="268.259973" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="99.248913" y="264.425402" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="105.080186" y="260.59083" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="110.911459" y="256.756259" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="116.742733" y="252.921688" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="122.574006" y="249.087116" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="128.405279" y="245.252545" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="134.236552" y="241.417974" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="140.067826" y="237.583403" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="145.899099" y="233.748831" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="151.730372" y="229.91426" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="157.561646" y="226.079689" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="163.392919" y="222.245118" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="169.224192" y="218.410546" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="175.055465" y="214.575975" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="180.886739" y="210.741404" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="186.718012" y="206.906833" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="192.549285" y="203.072261" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="198.380558" y="199.23769" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="204.211832" y="195.403119" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="210.043105" y="191.568548" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="215.874378" y="187.733976" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="221.705652" y="183.899405" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="227.536925" y="180.064834" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="233.368198" y="176.230263" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="239.199471" y="172.395691" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="245.030745" y="168.56112" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="250.862018" y="168.56112" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="256.693291" y="164.726549" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="262.524564" y="160.891977" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="268.355838" y="157.057406" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="274.187111" y="153.222835" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="280.018384" y="149.388264" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="285.849657" y="145.553692" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="291.680931" y="141.719121" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="297.512204" y="137.88455" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="303.343477" y="134.049979" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="309.174751" y="130.215407" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="315.006024" y="126.380836" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="320.837297" y="122.546265" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="326.66857" y="118.711694" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="332.499844" y="114.877122" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="338.331117" y="111.042551" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="344.16239" y="107.20798" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="349.993663" y="103.373409" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="355.824937" y="99.538837" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="361.65621" y="95.704266" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="367.487483" y="91.869695" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="373.318757" y="88.035124" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="379.15003" y="84.200552" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="384.981303" y="80.365981" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="390.812576" y="76.53141" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="396.64385" y="72.696838" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="402.475123" y="68.862267" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="408.306396" y="65.027696" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="414.137669" y="61.193125" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="419.968943" y="57.358553" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="425.800216" y="53.523982" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     <use xlink:href="#mf7e5d00153" x="431.631489" y="49.689411" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+    </g>
+   </g>
+   <g id="patch_3">
+    <path d="M 58.43 299.32 
+L 58.43 37.80224 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 450 299.32 
+L 450 37.80224 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 58.43 299.32 
+L 450 299.32 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 58.43 37.80224 
+L 450 37.80224 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 60.43 32.571885 
+L 448 32.571885 
+Q 450 32.571885 450 30.571885 
+L 450 12.635385 
+Q 450 10.635385 448 10.635385 
+L 60.43 10.635385 
+Q 58.43 10.635385 58.43 12.635385 
+L 58.43 30.571885 
+Q 58.43 32.571885 60.43 32.571885 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_15">
+     <g>
+      <use xlink:href="#mf7e5d00153" x="72.43" y="22.077135" style="fill-opacity: 0; stroke: #21918c; stroke-linejoin: miter"/>
+     </g>
+    </g>
+    <g id="text_16">
+     <!-- Conflict Rate -->
+     <g transform="translate(90.43 25.577135) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-43" d="M 4122 4306 
+L 4122 3641 
+Q 3803 3938 3442 4084 
+Q 3081 4231 2675 4231 
+Q 1875 4231 1450 3742 
+Q 1025 3253 1025 2328 
+Q 1025 1406 1450 917 
+Q 1875 428 2675 428 
+Q 3081 428 3442 575 
+Q 3803 722 4122 1019 
+L 4122 359 
+Q 3791 134 3420 21 
+Q 3050 -91 2638 -91 
+Q 1578 -91 968 557 
+Q 359 1206 359 2328 
+Q 359 3453 968 4101 
+Q 1578 4750 2638 4750 
+Q 3056 4750 3426 4639 
+Q 3797 4528 4122 4306 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-52" d="M 2841 2188 
+Q 3044 2119 3236 1894 
+Q 3428 1669 3622 1275 
+L 4263 0 
+L 3584 0 
+L 2988 1197 
+Q 2756 1666 2539 1819 
+Q 2322 1972 1947 1972 
+L 1259 1972 
+L 1259 0 
+L 628 0 
+L 628 4666 
+L 2053 4666 
+Q 2853 4666 3247 4331 
+Q 3641 3997 3641 3322 
+Q 3641 2881 3436 2590 
+Q 3231 2300 2841 2188 
+z
+M 1259 4147 
+L 1259 2491 
+L 2053 2491 
+Q 2509 2491 2742 2702 
+Q 2975 2913 2975 3322 
+Q 2975 3731 2742 3939 
+Q 2509 4147 2053 4147 
+L 1259 4147 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-43"/>
+      <use xlink:href="#DejaVuSans-6f" x="69.824219"/>
+      <use xlink:href="#DejaVuSans-6e" x="131.005859"/>
+      <use xlink:href="#DejaVuSans-66" x="194.384766"/>
+      <use xlink:href="#DejaVuSans-6c" x="229.589844"/>
+      <use xlink:href="#DejaVuSans-69" x="257.373047"/>
+      <use xlink:href="#DejaVuSans-63" x="285.15625"/>
+      <use xlink:href="#DejaVuSans-74" x="340.136719"/>
+      <use xlink:href="#DejaVuSans-20" x="379.345703"/>
+      <use xlink:href="#DejaVuSans-52" x="411.132812"/>
+      <use xlink:href="#DejaVuSans-61" x="478.365234"/>
+      <use xlink:href="#DejaVuSans-74" x="539.644531"/>
+      <use xlink:href="#DejaVuSans-65" x="578.853516"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pae68a78108">
+   <rect x="58.43" y="37.80224" width="391.57" height="261.51776"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.png b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.png
new file mode 100644
index 0000000000..77c0938581
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.png differ
diff --git a/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.svg b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.svg
new file mode 100644
index 0000000000..f4a2f17d14
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/data/profiling-by-example/ldsconflicts.svg
@@ -0,0 +1,1145 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="460.8pt" height="345.6pt" viewBox="0 0 460.8 345.6" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2023-08-17T18:14:36.907658</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.7.1, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 345.6 
+L 460.8 345.6 
+L 460.8 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 50.78 299.32 
+L 450 299.32 
+L 450 75.16224 
+L 50.78 75.16224 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m52dc845b6f" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m52dc845b6f" x="50.78" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- 0.0 -->
+      <g transform="translate(41.238125 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="98.419618" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- 2.5 -->
+      <g transform="translate(88.877743 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="146.059236" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- 5.0 -->
+      <g transform="translate(136.517361 315.438125) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="193.698854" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- 7.5 -->
+      <g transform="translate(184.156979 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-37" d="M 525 4666 
+L 3525 4666 
+L 3525 4397 
+L 1831 0 
+L 1172 0 
+L 2766 4134 
+L 525 4134 
+L 525 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-37"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="241.338473" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 10.0 -->
+      <g transform="translate(227.979098 315.438125) scale(0.12 -0.12)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-2e" x="127.246094"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="288.978091" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 12.5 -->
+      <g transform="translate(275.618716 315.438125) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-2e" x="127.246094"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="336.617709" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 15.0 -->
+      <g transform="translate(323.258334 315.438125) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-35" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-2e" x="127.246094"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="384.257327" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 17.5 -->
+      <g transform="translate(370.897952 315.438125) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-37" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-2e" x="127.246094"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m52dc845b6f" x="431.896945" y="299.32" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 20.0 -->
+      <g transform="translate(418.53757 315.438125) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-2e" x="127.246094"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- Kernel Dispatch [N] -->
+     <g transform="translate(180.566875 332.8755) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-4b" d="M 628 4666 
+L 1259 4666 
+L 1259 2694 
+L 3353 4666 
+L 4166 4666 
+L 1850 2491 
+L 4331 0 
+L 3500 0 
+L 1259 2247 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-68" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-5b" d="M 550 4863 
+L 1875 4863 
+L 1875 4416 
+L 1125 4416 
+L 1125 -397 
+L 1875 -397 
+L 1875 -844 
+L 550 -844 
+L 550 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-4e" d="M 628 4666 
+L 1478 4666 
+L 3547 763 
+L 3547 4666 
+L 4159 4666 
+L 4159 0 
+L 3309 0 
+L 1241 3903 
+L 1241 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-5d" d="M 1947 4863 
+L 1947 -844 
+L 622 -844 
+L 622 -397 
+L 1369 -397 
+L 1369 4416 
+L 622 4416 
+L 622 4863 
+L 1947 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-4b"/>
+      <use xlink:href="#DejaVuSans-65" x="60.576172"/>
+      <use xlink:href="#DejaVuSans-72" x="122.099609"/>
+      <use xlink:href="#DejaVuSans-6e" x="161.462891"/>
+      <use xlink:href="#DejaVuSans-65" x="224.841797"/>
+      <use xlink:href="#DejaVuSans-6c" x="286.365234"/>
+      <use xlink:href="#DejaVuSans-20" x="314.148438"/>
+      <use xlink:href="#DejaVuSans-44" x="345.935547"/>
+      <use xlink:href="#DejaVuSans-69" x="422.9375"/>
+      <use xlink:href="#DejaVuSans-73" x="450.720703"/>
+      <use xlink:href="#DejaVuSans-70" x="502.820312"/>
+      <use xlink:href="#DejaVuSans-61" x="566.296875"/>
+      <use xlink:href="#DejaVuSans-74" x="627.576172"/>
+      <use xlink:href="#DejaVuSans-63" x="666.785156"/>
+      <use xlink:href="#DejaVuSans-68" x="721.765625"/>
+      <use xlink:href="#DejaVuSans-20" x="785.144531"/>
+      <use xlink:href="#DejaVuSans-5b" x="816.931641"/>
+      <use xlink:href="#DejaVuSans-4e" x="855.945312"/>
+      <use xlink:href="#DejaVuSans-5d" x="930.75"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="ma58fb85e7a" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#ma58fb85e7a" x="50.78" y="289.131011" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0 -->
+      <g transform="translate(36.145 293.690073) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#ma58fb85e7a" x="50.78" y="240.612015" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 5 -->
+      <g transform="translate(36.145 245.171078) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#ma58fb85e7a" x="50.78" y="192.09302" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 10 -->
+      <g transform="translate(28.51 196.652082) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#ma58fb85e7a" x="50.78" y="143.574024" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 15 -->
+      <g transform="translate(28.51 148.133086) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-35" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#ma58fb85e7a" x="50.78" y="95.055028" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 20 -->
+      <g transform="translate(28.51 99.614091) scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_16">
+     <!-- Cycles -->
+     <g transform="translate(21.51525 210.67037) rotate(-90) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-43" d="M 4122 4306 
+L 4122 3641 
+Q 3803 3938 3442 4084 
+Q 3081 4231 2675 4231 
+Q 1875 4231 1450 3742 
+Q 1025 3253 1025 2328 
+Q 1025 1406 1450 917 
+Q 1875 428 2675 428 
+Q 3081 428 3442 575 
+Q 3803 722 4122 1019 
+L 4122 359 
+Q 3791 134 3420 21 
+Q 3050 -91 2638 -91 
+Q 1578 -91 968 557 
+Q 359 1206 359 2328 
+Q 359 3453 968 4101 
+Q 1578 4750 2638 4750 
+Q 3056 4750 3426 4639 
+Q 3797 4528 4122 4306 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-79" d="M 2059 -325 
+Q 1816 -950 1584 -1140 
+Q 1353 -1331 966 -1331 
+L 506 -1331 
+L 506 -850 
+L 844 -850 
+Q 1081 -850 1212 -737 
+Q 1344 -625 1503 -206 
+L 1606 56 
+L 191 3500 
+L 800 3500 
+L 1894 763 
+L 2988 3500 
+L 3597 3500 
+L 2059 -325 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-43"/>
+      <use xlink:href="#DejaVuSans-79" x="69.824219"/>
+      <use xlink:href="#DejaVuSans-63" x="129.003906"/>
+      <use xlink:href="#DejaVuSans-6c" x="183.984375"/>
+      <use xlink:href="#DejaVuSans-65" x="211.767578"/>
+      <use xlink:href="#DejaVuSans-73" x="273.291016"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_15">
+    <defs>
+     <path id="mecd52b7ca5" d="M 0 2 
+C 0.530406 2 1.03916 1.789267 1.414214 1.414214 
+C 1.789267 1.03916 2 0.530406 2 0 
+C 2 -0.530406 1.789267 -1.03916 1.414214 -1.414214 
+C 1.03916 -1.789267 0.530406 -2 0 -2 
+C -0.530406 -2 -1.03916 -1.789267 -1.414214 -1.414214 
+C -1.789267 -1.03916 -2 -0.530406 -2 0 
+C -2 0.530406 -1.789267 1.03916 -1.414214 1.414214 
+C -1.03916 1.789267 -0.530406 2 0 2 
+z
+" style="stroke: #440154"/>
+    </defs>
+    <g clip-path="url(#pbd5adeac22)">
+     <use xlink:href="#mecd52b7ca5" x="69.835847" y="289.131011" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="88.891695" y="279.427212" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="107.947542" y="269.723413" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="127.003389" y="260.019614" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="146.059236" y="250.315814" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="165.115084" y="240.612015" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="184.170931" y="230.908216" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="203.226778" y="221.204417" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="222.282625" y="211.500618" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="241.338473" y="201.796819" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="260.39432" y="192.09302" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="279.450167" y="182.38922" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="298.506014" y="172.685421" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="317.561862" y="162.981622" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="336.617709" y="153.277823" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="355.673556" y="143.574024" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="374.729403" y="133.870225" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="393.785251" y="124.166426" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="412.841098" y="114.462626" style="fill-opacity: 0; stroke: #440154"/>
+     <use xlink:href="#mecd52b7ca5" x="431.896945" y="104.758827" style="fill-opacity: 0; stroke: #440154"/>
+    </g>
+   </g>
+   <g id="line2d_16">
+    <defs>
+     <path id="me712094ef6" d="M -5.5 -0 
+L 5.5 5.5 
+L 5.5 -5.5 
+z
+" style="stroke: #31688e; stroke-linejoin: miter"/>
+    </defs>
+    <g clip-path="url(#pbd5adeac22)">
+     <use xlink:href="#me712094ef6" x="69.835847" y="269.723413" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="88.891695" y="260.019614" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="107.947542" y="250.315814" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="127.003389" y="240.612015" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="146.059236" y="230.908216" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="165.115084" y="221.204417" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="184.170931" y="211.500618" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="203.226778" y="201.796819" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="222.282625" y="192.09302" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="241.338473" y="182.38922" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="260.39432" y="172.685421" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="279.450167" y="162.981622" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="298.506014" y="153.277823" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="317.561862" y="143.574024" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="336.617709" y="133.870225" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="355.673556" y="124.166426" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="374.729403" y="114.462626" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="393.785251" y="104.758827" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="412.841098" y="95.055028" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     <use xlink:href="#me712094ef6" x="431.896945" y="85.351229" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+    </g>
+   </g>
+   <g id="line2d_17">
+    <defs>
+     <path id="m3997da41a5" d="M -7 7 
+L 7 7 
+L 7 -7 
+L -7 -7 
+z
+" style="stroke: #35b779; stroke-linejoin: miter"/>
+    </defs>
+    <g clip-path="url(#pbd5adeac22)">
+     <use xlink:href="#m3997da41a5" x="69.835847" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="88.891695" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="107.947542" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="127.003389" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="146.059236" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="165.115084" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="184.170931" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="203.226778" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="222.282625" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="241.338473" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="260.39432" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="279.450167" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="298.506014" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="317.561862" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="336.617709" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="355.673556" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="374.729403" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="393.785251" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="412.841098" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     <use xlink:href="#m3997da41a5" x="431.896945" y="269.723413" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+    </g>
+   </g>
+   <g id="patch_3">
+    <path d="M 50.78 299.32 
+L 50.78 75.16224 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 450 299.32 
+L 450 75.16224 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 50.78 299.32 
+L 450 299.32 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 50.78 75.16224 
+L 450 75.16224 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 52.78 70.679085 
+L 448 70.679085 
+Q 450 70.679085 450 68.679085 
+L 450 12.869585 
+Q 450 10.869585 448 10.869585 
+L 52.78 10.869585 
+Q 50.78 10.869585 50.78 12.869585 
+L 50.78 68.679085 
+Q 50.78 70.679085 52.78 70.679085 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_18">
+     <g>
+      <use xlink:href="#mecd52b7ca5" x="64.78" y="22.311335" style="fill-opacity: 0; stroke: #440154"/>
+     </g>
+    </g>
+    <g id="text_17">
+     <!-- Bank Conflicts -->
+     <g transform="translate(82.78 25.811335) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-42" d="M 1259 2228 
+L 1259 519 
+L 2272 519 
+Q 2781 519 3026 730 
+Q 3272 941 3272 1375 
+Q 3272 1813 3026 2020 
+Q 2781 2228 2272 2228 
+L 1259 2228 
+z
+M 1259 4147 
+L 1259 2741 
+L 2194 2741 
+Q 2656 2741 2882 2914 
+Q 3109 3088 3109 3444 
+Q 3109 3797 2882 3972 
+Q 2656 4147 2194 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 2241 4666 
+Q 2963 4666 3353 4366 
+Q 3744 4066 3744 3513 
+Q 3744 3084 3544 2831 
+Q 3344 2578 2956 2516 
+Q 3422 2416 3680 2098 
+Q 3938 1781 3938 1306 
+Q 3938 681 3513 340 
+Q 3088 0 2303 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6b" d="M 581 4863 
+L 1159 4863 
+L 1159 1991 
+L 2875 3500 
+L 3609 3500 
+L 1753 1863 
+L 3688 0 
+L 2938 0 
+L 1159 1709 
+L 1159 0 
+L 581 0 
+L 581 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-42"/>
+      <use xlink:href="#DejaVuSans-61" x="68.603516"/>
+      <use xlink:href="#DejaVuSans-6e" x="129.882812"/>
+      <use xlink:href="#DejaVuSans-6b" x="193.261719"/>
+      <use xlink:href="#DejaVuSans-20" x="251.171875"/>
+      <use xlink:href="#DejaVuSans-43" x="282.958984"/>
+      <use xlink:href="#DejaVuSans-6f" x="352.783203"/>
+      <use xlink:href="#DejaVuSans-6e" x="413.964844"/>
+      <use xlink:href="#DejaVuSans-66" x="477.34375"/>
+      <use xlink:href="#DejaVuSans-6c" x="512.548828"/>
+      <use xlink:href="#DejaVuSans-69" x="540.332031"/>
+      <use xlink:href="#DejaVuSans-63" x="568.115234"/>
+      <use xlink:href="#DejaVuSans-74" x="623.095703"/>
+      <use xlink:href="#DejaVuSans-73" x="662.304688"/>
+     </g>
+    </g>
+    <g id="line2d_19">
+     <g>
+      <use xlink:href="#me712094ef6" x="64.78" y="41.247835" style="fill-opacity: 0; stroke: #31688e; stroke-linejoin: miter"/>
+     </g>
+    </g>
+    <g id="text_18">
+     <!-- Index Accesses -->
+     <g transform="translate(82.78 44.747835) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-49" d="M 628 4666 
+L 1259 4666 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-78" d="M 3513 3500 
+L 2247 1797 
+L 3578 0 
+L 2900 0 
+L 1881 1375 
+L 863 0 
+L 184 0 
+L 1544 1831 
+L 300 3500 
+L 978 3500 
+L 1906 2253 
+L 2834 3500 
+L 3513 3500 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-41" d="M 2188 4044 
+L 1331 1722 
+L 3047 1722 
+L 2188 4044 
+z
+M 1831 4666 
+L 2547 4666 
+L 4325 0 
+L 3669 0 
+L 3244 1197 
+L 1141 1197 
+L 716 0 
+L 50 0 
+L 1831 4666 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-49"/>
+      <use xlink:href="#DejaVuSans-6e" x="29.492188"/>
+      <use xlink:href="#DejaVuSans-64" x="92.871094"/>
+      <use xlink:href="#DejaVuSans-65" x="156.347656"/>
+      <use xlink:href="#DejaVuSans-78" x="216.121094"/>
+      <use xlink:href="#DejaVuSans-20" x="275.300781"/>
+      <use xlink:href="#DejaVuSans-41" x="307.087891"/>
+      <use xlink:href="#DejaVuSans-63" x="373.746094"/>
+      <use xlink:href="#DejaVuSans-63" x="428.726562"/>
+      <use xlink:href="#DejaVuSans-65" x="483.707031"/>
+      <use xlink:href="#DejaVuSans-73" x="545.230469"/>
+      <use xlink:href="#DejaVuSans-73" x="597.330078"/>
+      <use xlink:href="#DejaVuSans-65" x="649.429688"/>
+      <use xlink:href="#DejaVuSans-73" x="710.953125"/>
+     </g>
+    </g>
+    <g id="line2d_20">
+     <g>
+      <use xlink:href="#m3997da41a5" x="64.78" y="60.184335" style="fill-opacity: 0; stroke: #35b779; stroke-linejoin: miter"/>
+     </g>
+    </g>
+    <g id="text_19">
+     <!-- Index Accesses - Bank Conflicts -->
+     <g transform="translate(82.78 63.684335) scale(0.144 -0.144)">
+      <defs>
+       <path id="DejaVuSans-2d" d="M 313 2009 
+L 1997 2009 
+L 1997 1497 
+L 313 1497 
+L 313 2009 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-49"/>
+      <use xlink:href="#DejaVuSans-6e" x="29.492188"/>
+      <use xlink:href="#DejaVuSans-64" x="92.871094"/>
+      <use xlink:href="#DejaVuSans-65" x="156.347656"/>
+      <use xlink:href="#DejaVuSans-78" x="216.121094"/>
+      <use xlink:href="#DejaVuSans-20" x="275.300781"/>
+      <use xlink:href="#DejaVuSans-41" x="307.087891"/>
+      <use xlink:href="#DejaVuSans-63" x="373.746094"/>
+      <use xlink:href="#DejaVuSans-63" x="428.726562"/>
+      <use xlink:href="#DejaVuSans-65" x="483.707031"/>
+      <use xlink:href="#DejaVuSans-73" x="545.230469"/>
+      <use xlink:href="#DejaVuSans-73" x="597.330078"/>
+      <use xlink:href="#DejaVuSans-65" x="649.429688"/>
+      <use xlink:href="#DejaVuSans-73" x="710.953125"/>
+      <use xlink:href="#DejaVuSans-20" x="763.052734"/>
+      <use xlink:href="#DejaVuSans-2d" x="794.839844"/>
+      <use xlink:href="#DejaVuSans-20" x="830.923828"/>
+      <use xlink:href="#DejaVuSans-42" x="862.710938"/>
+      <use xlink:href="#DejaVuSans-61" x="931.314453"/>
+      <use xlink:href="#DejaVuSans-6e" x="992.59375"/>
+      <use xlink:href="#DejaVuSans-6b" x="1055.972656"/>
+      <use xlink:href="#DejaVuSans-20" x="1113.882812"/>
+      <use xlink:href="#DejaVuSans-43" x="1145.669922"/>
+      <use xlink:href="#DejaVuSans-6f" x="1215.494141"/>
+      <use xlink:href="#DejaVuSans-6e" x="1276.675781"/>
+      <use xlink:href="#DejaVuSans-66" x="1340.054688"/>
+      <use xlink:href="#DejaVuSans-6c" x="1375.259766"/>
+      <use xlink:href="#DejaVuSans-69" x="1403.042969"/>
+      <use xlink:href="#DejaVuSans-63" x="1430.826172"/>
+      <use xlink:href="#DejaVuSans-74" x="1485.806641"/>
+      <use xlink:href="#DejaVuSans-73" x="1525.015625"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pbd5adeac22">
+   <rect x="50.78" y="75.16224" width="399.22" height="224.15776"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/projects/rocprofiler-compute/docs/data/unused/L1_l2_transactions_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L1_l2_transactions_per_channel.png
new file mode 100644
index 0000000000..7b839ab0fe
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L1_l2_transactions_per_channel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_latencies_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_latencies_per_channel.png
new file mode 100644
index 0000000000..a0b3471974
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_latencies_per_channel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_stalls_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_stalls_per_channel.png
new file mode 100644
index 0000000000..ac1c5dffb1
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_stalls_per_channel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_stalls_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_stalls_per_channel.png
new file mode 100644
index 0000000000..d5a1c2c072
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_stalls_per_channel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_starvation_per_channel.png b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_starvation_per_channel.png
new file mode 100644
index 0000000000..49d584621d
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/L2_ea_write_starvation_per_channel.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/Memory_latencies.png b/projects/rocprofiler-compute/docs/data/unused/Memory_latencies.png
new file mode 100644
index 0000000000..3b97d72e0d
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/Memory_latencies.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/Roofline_analysis.png b/projects/rocprofiler-compute/docs/data/unused/Roofline_analysis.png
new file mode 100644
index 0000000000..36efd2ea77
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/Roofline_analysis.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/Top_bottleneck_kernels.png b/projects/rocprofiler-compute/docs/data/unused/Top_bottleneck_kernels.png
new file mode 100644
index 0000000000..17b8ef7da2
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/Top_bottleneck_kernels.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/fig_level_counter.png b/projects/rocprofiler-compute/docs/data/unused/fig_level_counter.png
new file mode 100644
index 0000000000..fa50539a0c
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/fig_level_counter.png differ
diff --git a/projects/rocprofiler-compute/docs/data/unused/omniperf_architecture.png b/projects/rocprofiler-compute/docs/data/unused/omniperf_architecture.png
new file mode 100644
index 0000000000..966ac2d608
Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/unused/omniperf_architecture.png differ
diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst
new file mode 100644
index 0000000000..f76e3970fc
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst
@@ -0,0 +1,378 @@
+.. meta::
+   :description: Omniperf analysis: CLI analysis
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, command line, analyze, filtering, metrics, baseline, comparison
+
+************
+CLI analysis
+************
+
+This section provides an overview of Omniperf's CLI analysis features.
+
+* :ref:`Derived metrics <cli-list-metrics>`: All of Omniperf's built-in metrics.
+
+* :ref:`Baseline comparison <analysis-baseline-comparison>`: Compare multiple
+  runs in a side-by-side manner.
+
+* :ref:`Metric customization <cli-analysis-options>`: Isolate a subset of
+  built-in metrics or build your own profiling configuration.
+
+* :ref:`Filtering <cli-analysis-options>`: Hone in on a particular kernel,
+  GPU ID, or dispatch ID via post-process filtering.
+
+Run ``omniperf analyze -h`` for more details.
+
+.. _cli-walkthrough:
+
+Walkthrough
+===========
+
+1. To begin, generate a high-level analysis report using Omniperf's ``-b`` (or ``--block``) flag. 
+
+   .. code-block:: shell
+
+      $ omniperf analyze -p workloads/vcopy/MI200/ -b 2
+
+        ___                  _                  __ 
+       / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+      | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+      | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+       \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                              |_|                  
+
+      Analysis mode = cli
+      [analysis] deriving Omniperf metrics...
+
+      --------------------------------------------------------------------------------
+      0. Top Stats
+      0.1 Top Kernels
+      ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+      │    │ Kernel_Name                              │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+      ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+      │  0 │ vecCopy(double*, double*, double*, int,  │    1.00 │  20160.00 │   20160.00 │     20160.00 │ 100.00 │
+      │    │ int) [clone .kd]                         │         │           │            │              │        │
+      ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+      0.2 Dispatch List
+      ╒════╤═══════════════╤══════════════════════════════════════════════════════════╤══════════╕
+      │    │   Dispatch_ID │ Kernel_Name                                              │   GPU_ID │
+      ╞════╪═══════════════╪══════════════════════════════════════════════════════════╪══════════╡
+      │  0 │             0 │ vecCopy(double*, double*, double*, int, int) [clone .kd] │        0 │
+      ╘════╧═══════════════╧══════════════════════════════════════════════════════════╧══════════╛
+
+
+      --------------------------------------------------------------------------------
+      2. System Speed-of-Light
+      2.1 Speed-of-Light
+      ╒═════════════╤═══════════════════════════╤═════════╤══════════════════╤══════════╤═══════════════╕
+      │ Metric_ID   │ Metric                    │ Avg     │ Unit             │ Peak     │ Pct of Peak   │
+      ╞═════════════╪═══════════════════════════╪═════════╪══════════════════╪══════════╪═══════════════╡
+      │ 2.1.0       │ VALU FLOPs                │ 0.0     │ Gflop            │ 22630.4  │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.1       │ VALU IOPs                 │ 364.09  │ Giop             │ 22630.4  │ 1.61          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.2       │ MFMA FLOPs (BF16)         │ 0.0     │ Gflop            │ 181043.2 │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.3       │ MFMA FLOPs (F16)          │ 0.0     │ Gflop            │ 181043.2 │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.4       │ MFMA FLOPs (F32)          │ 0.0     │ Gflop            │ 45260.8  │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.5       │ MFMA FLOPs (F64)          │ 0.0     │ Gflop            │ 45260.8  │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.6       │ MFMA IOPs (Int8)          │ 0.0     │ Giop             │ 181043.2 │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.7       │ Active CUs                │ 70.0    │ Cus              │ 104.0    │ 67.31         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.8       │ SALU Utilization          │ 3.78    │ Pct              │ 100.0    │ 3.78          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.9       │ VALU Utilization          │ 5.4     │ Pct              │ 100.0    │ 5.4           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.10      │ MFMA Utilization          │ 0.0     │ Pct              │ 100.0    │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.11      │ VMEM Utilization          │ 1.08    │ Pct              │ 100.0    │ 1.08          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.12      │ Branch Utilization        │ 1.08    │ Pct              │ 100.0    │ 1.08          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.13      │ VALU Active Threads       │ 64.0    │ Threads          │ 64.0     │ 100.0         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.14      │ IPC                       │ 0.21    │ Instr/cycle      │ 5.0      │ 4.13          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.15      │ Wavefront Occupancy       │ 2488.86 │ Wavefronts       │ 3328.0   │ 74.79         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.16      │ Theoretical LDS Bandwidth │ 0.0     │ Gb/s             │ 22630.4  │ 0.0           │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.17      │ LDS Bank Conflicts/Access │         │ Conflicts/access │ 32.0     │               │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.18      │ vL1D Cache Hit Rate       │ 50.0    │ Pct              │ 100.0    │ 50.0          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.19      │ vL1D Cache BW             │ 1664.41 │ Gb/s             │ 11315.2  │ 14.71         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.20      │ L2 Cache Hit Rate         │ 35.74   │ Pct              │ 100.0    │ 35.74         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.21      │ L2 Cache BW               │ 1296.31 │ Gb/s             │ 3481.6   │ 37.23         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.22      │ L2-Fabric Read BW         │ 416.52  │ Gb/s             │ 1638.4   │ 25.42         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.23      │ L2-Fabric Write BW        │ 292.3   │ Gb/s             │ 1638.4   │ 17.84         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.24      │ L2-Fabric Read Latency    │ 262.85  │ Cycles           │          │               │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.25      │ L2-Fabric Write Latency   │ 307.4   │ Cycles           │          │               │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.26      │ sL1D Cache Hit Rate       │ 99.82   │ Pct              │ 100.0    │ 99.82         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.27      │ sL1D Cache BW             │ 208.05  │ Gb/s             │ 6092.8   │ 3.41          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.28      │ L1I Hit Rate              │ 99.91   │ Pct              │ 100.0    │ 99.91         │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.29      │ L1I BW                    │ 208.05  │ Gb/s             │ 6092.8   │ 3.41          │
+      ├─────────────┼───────────────────────────┼─────────┼──────────────────┼──────────┼───────────────┤
+      │ 2.1.30      │ L1I Fetch Latency         │ 20.86   │ Cycles           │          │               │
+      ╘═════════════╧═══════════════════════════╧═════════╧══════════════════╧══════════╧═══════════════╛
+
+      ...
+
+.. _cli-list-metrics:
+
+2. Use ``--list-metrics`` to generate a list of available metrics for inspection.
+
+   .. code-block:: shell
+
+      $ omniperf analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a
+
+        ___                  _                  __ 
+       / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+      | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+      | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+       \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                              |_|                  
+
+      Analysis mode = cli
+      [analysis] deriving Omniperf metrics...
+      0 -> Top Stats
+      1 -> System Info
+      2 -> System Speed-of-Light
+              2.1 -> Speed-of-Light
+                      2.1.0 -> VALU FLOPs
+                      2.1.1 -> VALU IOPs
+                      2.1.2 -> MFMA FLOPs (BF16)
+                      2.1.3 -> MFMA FLOPs (F16)
+                      2.1.4 -> MFMA FLOPs (F32)
+                      2.1.5 -> MFMA FLOPs (F64)
+                      2.1.6 -> MFMA IOPs (Int8)
+                      2.1.7 -> Active CUs
+                      2.1.8 -> SALU Utilization
+                      2.1.9 -> VALU Utilization
+                      2.1.10 -> MFMA Utilization
+                      2.1.11 -> VMEM Utilization
+                      2.1.12 -> Branch Utilization
+                      2.1.13 -> VALU Active Threads
+                      2.1.14 -> IPC
+                      2.1.15 -> Wavefront Occupancy
+                      2.1.16 -> Theoretical LDS Bandwidth
+                      2.1.17 -> LDS Bank Conflicts/Access
+                      2.1.18 -> vL1D Cache Hit Rate
+                      2.1.19 -> vL1D Cache BW
+                      2.1.20 -> L2 Cache Hit Rate
+                      2.1.21 -> L2 Cache BW
+                       2.1.22 -> L2-Fabric Read BW
+                      2.1.23 -> L2-Fabric Write BW
+                      2.1.24 -> L2-Fabric Read Latency
+                      2.1.25 -> L2-Fabric Write Latency
+                      2.1.26 -> sL1D Cache Hit Rate
+                      2.1.27 -> sL1D Cache BW
+                      2.1.28 -> L1I Hit Rate
+                      2.1.29 -> L1I BW
+                      2.1.30 -> L1I Fetch Latency
+      ...
+
+3. Choose your own customized subset of metrics with the ``-b`` (or ``--block``)
+   option. Or, build your own configuration following
+   `config_template <https://github.com/ROCm/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml>`_.
+   The following snippet shows how to generate a report containing only metric 2
+   (:doc:`System Speed-of-Light </conceptual/system-speed-of-light>`).
+
+   .. code-block:: shell
+
+      $ omniperf analyze -p workloads/vcopy/MI200/ -b 2
+
+      --------
+      Analyze
+      --------
+
+      --------------------------------------------------------------------------------
+      0. Top Stat
+      ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+      │    │ KernelName                               │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+      ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+      │  0 │ vecCopy(double*, double*, double*, int,  │       1 │  20000.00 │   20000.00 │     20000.00 │ 100.00 │
+      │    │ int) [clone .kd]                         │         │           │            │              │        │
+      ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+      --------------------------------------------------------------------------------
+      2. System Speed-of-Light
+      ╒═════════╤═══════════════════════════╤═══════════════════════╤══════════════════╤════════════════════╤════════════════════════╕
+      │ Index   │ Metric                    │ Value                 │ Unit             │ Peak               │ PoP                    │
+      ╞═════════╪═══════════════════════════╪═══════════════════════╪══════════════════╪════════════════════╪════════════════════════╡
+      │ 2.1.0   │ VALU FLOPs                │ 0.0                   │ Gflop            │ 22630.4            │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.1   │ VALU IOPs                 │ 367.0016              │ Giop             │ 22630.4            │ 1.6217194570135745     │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.2   │ MFMA FLOPs (BF16)         │ 0.0                   │ Gflop            │ 90521.6            │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.3   │ MFMA FLOPs (F16)          │ 0.0                   │ Gflop            │ 181043.2           │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.4   │ MFMA FLOPs (F32)          │ 0.0                   │ Gflop            │ 45260.8            │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.5   │ MFMA FLOPs (F64)          │ 0.0                   │ Gflop            │ 45260.8            │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.6   │ MFMA IOPs (Int8)          │ 0.0                   │ Giop             │ 181043.2           │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.7   │ Active CUs                │ 74                    │ Cus              │ 104                │ 71.15384615384616      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.8   │ SALU Util                 │ 4.016057506716307     │ Pct              │ 100                │ 4.016057506716307      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.9   │ VALU Util                 │ 5.737225009594725     │ Pct              │ 100                │ 5.737225009594725      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.10  │ MFMA Util                 │ 0.0                   │ Pct              │ 100                │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.11  │ VALU Active Threads/Wave  │ 64.0                  │ Threads          │ 64                 │ 100.0                  │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.12  │ IPC - Issue               │ 1.0                   │ Instr/cycle      │ 5                  │ 20.0                   │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.13  │ LDS BW                    │ 0.0                   │ Gb/sec           │ 22630.4            │ 0.0                    │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.14  │ LDS Bank Conflict         │                       │ Conflicts/access │ 32                 │                        │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.15  │ Instr Cache Hit Rate      │ 99.91306912556854     │ Pct              │ 100                │ 99.91306912556854      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.16  │ Instr Cache BW            │ 209.7152              │ Gb/s             │ 6092.8             │ 3.442016806722689      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.17  │ Scalar L1D Cache Hit Rate │ 99.81986908342313     │ Pct              │ 100                │ 99.81986908342313      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.18  │ Scalar L1D Cache BW       │ 209.7152              │ Gb/s             │ 6092.8             │ 3.442016806722689      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.19  │ Vector L1D Cache Hit Rate │ 50.0                  │ Pct              │ 100                │ 50.0                   │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.20  │ Vector L1D Cache BW       │ 1677.7216             │ Gb/s             │ 11315.199999999999 │ 14.82714932126697      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.21  │ L2 Cache Hit Rate         │ 35.55067615693325     │ Pct              │ 100                │ 35.55067615693325      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.22  │ L2-Fabric Read BW         │ 419.8496              │ Gb/s             │ 1638.4             │ 25.6255859375          │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.23  │ L2-Fabric Write BW        │ 293.9456              │ Gb/s             │ 1638.4             │ 17.941015625           │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.24  │ L2-Fabric Read Latency    │ 256.6482321288385     │ Cycles           │                    │                        │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.25  │ L2-Fabric Write Latency   │ 317.2264255699014     │ Cycles           │                    │                        │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.26  │ Wave Occupancy            │ 1821.723057333852     │ Wavefronts       │ 3328               │ 54.73927455931046      │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.27  │ Instr Fetch BW            │ 4.174722306564298e-08 │ Gb/s             │ 3046.4             │ 1.3703789084047721e-09 │
+      ├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
+      │ 2.1.28  │ Instr Fetch Latency       │ 21.729248046875       │ Cycles           │                    │                        │
+      ╘═════════╧═══════════════════════════╧═══════════════════════╧══════════════════╧════════════════════╧════════════════════════╛
+
+   .. note::
+
+      Some cells may be blank indicating a missing or unavailable hardware
+      counter or NULL value.
+
+4. Optimize the application, iterate, and re-profile to inspect performance
+   changes.
+
+5. Redo a comprehensive analysis with Omniperf CLI at any optimization
+   milestone.
+
+.. _cli-analysis-options:
+
+More analysis options
+=====================
+
+Single run
+  .. code-block:: shell
+
+     $ omniperf analyze -p workloads/vcopy/MI200/
+
+List top kernels and dispatches
+  .. code-block:: shell
+
+     $ omniperf analyze -p workloads/vcopy/MI200/  --list-stats
+
+List metrics
+  .. code-block:: shell
+
+     $ omniperf analyze -p workloads/vcopy/MI200/  --list-metrics gfx90a
+
+Show System Speed-of-Light and CS_Busy blocks only
+  .. code-block:: shell
+
+     $ omniperf analyze -p workloads/vcopy/MI200/  -b 2  5.1.0
+
+.. note::
+
+   You can filter a single metric or the whole hardware component by its ID. In
+   this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for
+   GPU Busy Cycles metric.
+
+Filter kernels
+  First, list the top kernels in your application using `--list-stats`.
+
+  .. code-block::
+
+     $ omniperf analyze -p workloads/vcopy/MI200/ --list-stats
+
+     Analysis mode = cli
+     [analysis] deriving Omniperf metrics...
+
+     --------------------------------------------------------------------------------
+     Detected Kernels (sorted descending by duration)
+     ╒════╤══════════════════════════════════════════════╕
+     │    │ Kernel_Name                                  │
+     ╞════╪══════════════════════════════════════════════╡
+     │  0 │ vecCopy(double*, double*, double*, int, int) │
+     ╘════╧══════════════════════════════════════════════╛
+
+     --------------------------------------------------------------------------------
+     Dispatch list
+     ╒════╤═══════════════╤══════════════════════════════════════════════╤══════════╕
+     │    │   Dispatch_ID │ Kernel_Name                                  │   GPU_ID │
+     ╞════╪═══════════════╪══════════════════════════════════════════════╪══════════╡
+     │  0 │             0 │ vecCopy(double*, double*, double*, int, int) │        0 │
+     ╘════╧═══════════════╧══════════════════════════════════════════════╧══════════╛
+
+  Second, select the index of the kernel you would like to filter; for example,
+  ``vecCopy(double*, double*, double*, int, int) [clone .kd]`` at index ``0``.
+  Then, use this index to apply the filter via ``-k`` or ``--kernels``.
+
+  .. code-block:: shell
+
+     $ omniperf analyze -p workloads/vcopy/MI200/ -k 0
+
+     Analysis mode = cli
+     [analysis] deriving Omniperf metrics...
+
+     --------------------------------------------------------------------------------
+     0. Top Stats
+     0.1 Top Kernels
+     ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╤═════╕
+     │    │ Kernel_Name                              │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │ S   │
+     ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╪═════╡
+     │  0 │ vecCopy(double*, double*, double*, int,  │    1.00 │  18560.00 │   18560.00 │     18560.00 │ 100.00 │ *   │
+     │    │ int)                                     │         │           │            │              │        │     │
+     ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╧═════╛
+     ...
+
+  You should see your filtered kernels indicated by an asterisk in the **Top
+  Stats** table.
+
+
+Baseline comparison
+  .. code-block:: shell
+
+     omniperf analyze -p workload1/path/  -p workload2/path/
+
+  OR
+
+  .. code-block:: shell
+
+     omniperf analyze -p workload1/path/ -k 0  -p workload2/path/ -k 1
diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/grafana-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/grafana-gui.rst
new file mode 100644
index 0000000000..d5474aefbd
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/how-to/analyze/grafana-gui.rst
@@ -0,0 +1,1071 @@
+.. meta::
+   :description: Omniperf analysis: Grafana GUI
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, Grafana, panels, GUI, import
+
+********************
+Grafana GUI analysis
+********************
+
+Find setup instructions in :doc:`../../install/grafana-setup`.
+
+The Omniperf Grafana analysis dashboard GUI supports the following features to
+facilitate MI accelerator performance profiling and analysis:
+
+* System and hardware component (hardware block)
+
+* Speed-of-Light (SOL)
+
+* Multiple normalization options
+
+* Baseline comparisons
+
+* Regex-based dispatch ID filtering
+
+* Roofline analysis
+
+* Detailed performance counters and metrics per hardware component, such as:
+
+  * Command Processor - Fetch (CPF) / Command Processor - Controller (CPC)
+
+  * Workgroup Manager (SPI)
+
+  * Shader Sequencer (SQ)
+
+  * Shader Sequencer Controller (SQC)
+
+  * L1 Address Processing Unit, a.k.a. Texture Addresser (TA) / L1 Backend Data
+    Processing Unit, a.k.a. Texture Data (TD)
+
+  * L1 Cache (TCP)
+
+  * L2 Cache (TCC) (both aggregated and per-channel perf info)
+
+See the full list of :ref:`Omniperf's analysis panels <panels>`.
+
+.. _analysis-sol:
+
+Speed-of-Light
+--------------
+
+Speed-of-Light panels are provided at both the system and per hardware component
+level to help diagnosis performance bottlenecks. The performance numbers of the
+workload under testing are compared to the theoretical maximum, such as floating
+point operations, bandwidth, cache hit rate, etc., to indicate the available
+room to further utilize the hardware capability.
+
+.. _analysis-normalizations:
+
+Normalizations
+--------------
+
+Multiple performance number normalizations are provided to allow performance
+inspection within both hardware and software context. The following
+normalizations are available.
+
+* ``per_wave``
+
+* ``per_cycle``
+
+* ``per_kernel``
+
+* ``per_second``
+
+See :ref:`normalization-units` to learn more about Omniperf normalizations.
+
+.. _analysis-baseline-comparison:
+
+Baseline comparison
+-------------------
+
+Omniperf enables baseline comparison to allow checking A/B effect. Currently
+baseline comparison is limited to the same :ref:`SoC <def-soc>`. Cross
+comparison between SoCs is in development.
+
+For both the Current Workload and the Baseline Workload, you can independently
+setup the following filters to allow fine grained comparisons:
+
+* Workload Name
+
+* GPU ID filtering (multi-selection)
+
+* Kernel Name filtering (multi-selection)
+
+* Dispatch ID filtering (regex filtering)
+
+* Omniperf Panels (multi-selection)
+
+.. _analysis-regex-dispatch-id:
+
+Regex-based dispatch ID filtering
+---------------------------------
+
+Omniperf allows filtering via Regular Expressions (regex), a standard Linux
+string matching syntax, based dispatch ID filtering to flexibly choose the
+kernel invocations.
+
+For example, to inspect Dispatch Range from 17 to 48, inclusive, the
+corresponding regex is : ``(1[7-9]|[23]\d|4[0-8])``.
+
+.. tip::
+
+   Try `Regex Numeric Range Generator <https://3widgets.com/>`_ for help
+   generating typical number ranges.
+
+.. _analysis-incremental-profiling:
+
+Incremental profiling
+---------------------
+
+Omniperf supports incremental profiling to speed up performance analysis.
+
+Refer to the :ref:`profiling-hw-component-filtering` section for this command.
+
+By default, the entire application is profiled to collect performance counters
+for all hardware blocks, giving a complete view of where the workload stands in
+terms of performance optimization opportunities and bottlenecks.
+
+You can choose to focus on only a few hardware components -- for example L1
+cache or LDS -- to closely check the effect of software optimizations, without
+performing application replay for *all* other hardware components. This saves
+a lot of compute time. In addition, prior profiling results for other hardware
+components are not overwritten; instead, they can be merged during the import to
+piece together an overall profile of the system.
+
+.. _analysis-color-coding:
+
+Color coding
+------------
+
+Uniform color coding applies to most visualizations -- including bar graphs,
+tables, and diagrams -- for easy inspection. As a rule of thumb, *yellow* means
+over 50%, while *red* means over 90% percent.
+
+Global variables and configurations
+-----------------------------------
+
+.. image:: ../../data/analyze/global_variables.png
+   :align: center
+   :alt: Omniperf global variables and configurations
+   :width: 800
+
+.. _grafana-gui-import:
+
+Grafana GUI import
+------------------
+
+The Omniperf database ``--import`` option imports the raw profiling data to
+Grafana's backend MongoDB database. This step is only required for Grafana
+GUI-based performance analysis.
+
+Default username and password for MongoDB (to be used in database mode) are as
+follows:
+
+* **Username**: ``temp``
+
+* **Password**: ``temp123``
+
+Each workload is imported to a separate database with the following naming
+convention:
+
+.. code-block:: shell
+
+    omniperf_<team>_<database>_<soc>
+
+For example:
+
+.. code-block:: shell
+
+   omniperf_asw_vcopy_mi200
+
+When using :ref:`database mode <modes-database>`, be sure to tailor the
+connection options to the machine hosting your
+:doc:`server-side instance </install/grafana-setup>`. Below is the sample
+command to import the *vcopy* profiling data, assuming our host machine is
+called ``dummybox``.
+
+.. _grafana-gui-remove:
+
+.. code-block:: shell-session
+
+   $ omniperf database --help
+   usage:
+
+   omniperf database <interaction type> [connection options]
+
+
+
+   -------------------------------------------------------------------------------
+
+   Examples:
+
+           omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/
+
+           omniperf database --remove -H pavii1 -u temp -w omniperf_asw_sample_mi200
+
+   -------------------------------------------------------------------------------
+
+
+
+   Help:
+     -h, --help         show this help message and exit
+
+   General Options:
+     -v, --version      show program's version number and exit
+     -V, --verbose      Increase output verbosity (use multiple times for higher levels)
+     -s, --specs        Print system specs.
+
+   Interaction Type:
+     -i, --import                                  Import workload to Omniperf DB
+     -r, --remove                                  Remove a workload from Omniperf DB
+
+   Connection Options:
+     -H , --host                                   Name or IP address of the server host.
+     -P , --port                                   TCP/IP Port. (DEFAULT: 27018)
+     -u , --username                               Username for authentication.
+     -p , --password                               The user's password. (will be requested later if it's not set)
+     -t , --team                                   Specify Team prefix.
+     -w , --workload                               Specify name of workload (to remove) or path to workload (to import)
+     --kernel-verbose              Specify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)
+
+
+Omniperf import for vcopy:
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: shell
+
+   $ omniperf database --import -H dummybox -u temp -t asw -w workloads/vcopy/mi200/
+
+     ___                  _                  __
+    / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+   | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_
+   | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+    \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|
+                           |_|
+
+
+   Pulling data from  /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   The directory exists
+   Found sysinfo file
+   KernelName shortening enabled
+   Kernel name verbose level: 2
+   Password:
+   Password received
+   -- Conversion & Upload in Progress --
+     0%|                                                                                                                                                                                                             | 0/11 [00:00<?, ?it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_IFETCH_LEVEL.csv
+     9%|█████████████████▉                                                                                                                                                                                   | 1/11 [00:00<00:01,  8.53it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/pmc_perf.csv
+    18%|███████████████████████████████████▊                                                                                                                                                                 | 2/11 [00:00<00:01,  6.99it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_INST_LEVEL_SMEM.csv
+    27%|█████████████████████████████████████████████████████▋                                                                                                                                               | 3/11 [00:00<00:01,  7.90it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_LEVEL_WAVES.csv
+    36%|███████████████████████████████████████████████████████████████████████▋                                                                                                                             | 4/11 [00:00<00:00,  8.56it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_INST_LEVEL_LDS.csv
+    45%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                           | 5/11 [00:00<00:00,  9.00it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_INST_LEVEL_VMEM.csv
+    55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                         | 6/11 [00:00<00:00,  9.24it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/sysinfo.csv
+    64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 7/11 [00:00<00:00,  9.37it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/roofline.csv
+    82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 9/11 [00:00<00:00, 12.60it/s]/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/timestamps.csv
+   100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 11.05it/s]
+   9 collections added.
+   Workload name uploaded
+   -- Complete! --
+
+.. _panels:
+
+Omniperf panels
+---------------
+
+There are currently 18 main panel categories available for analyzing the compute
+workload performance. Each category contains several panels for close inspection
+of the system performance.
+
+- :ref:`Kernel Statistics <grafana-panel-kernel-stats>`
+
+  - Kernel time histogram
+
+  - Top ten bottleneck kernels
+
+- :ref:`System Speed-of-Light <grafana-panel-system-sol>`
+
+  - Speed-of-Light
+
+  - System Info table
+
+- :ref:`Memory Chart Analysis <grafana-panel-memory-chart-analysis>`
+
+- :ref:`Roofline Analysis <grafana-panel-roofline-analysis>`
+
+  - FP32/FP64
+
+  - FP16/INT8
+
+- :ref:`Command Processor <grafana-panel-cp>`
+
+  - Command Processor - Fetch (CPF)
+
+  - Command Processor - Controller (CPC)
+
+- :ref:`Workgroup Manager or Shader Processor Input (SPI) <grafana-panel-spi>`
+
+  - SPI Stats
+
+  - SPI Resource Allocations
+
+- :ref:`Wavefront Launch <grafana-panel-wavefront>`
+
+  - Wavefront Launch Stats
+
+  - Wavefront runtime stats
+
+  - per-SE Wavefront Scheduling performance
+
+- :ref:`Wavefront Lifetime <grafana-panel-wavefront>`
+
+  - Wavefront lifetime breakdown
+
+  - per-SE wavefront life (average)
+
+  - per-SE wavefront life (histogram)
+
+- :ref:`Wavefront Occupancy <grafana-panel-wavefront>`
+
+  - per-SE wavefront occupancy
+
+  - per-CU wavefront occupancy
+
+- :ref:`Compute Unit - Instruction Mix <grafana-panel-cu-instruction-mix>`
+
+  - per-wave Instruction mix
+
+  - per-wave VALU Arithmetic instruction mix
+
+  - per-wave MFMA Arithmetic instruction mix
+
+- :ref:`Compute Unit - Compute Pipeline <grafana-panel-cu-compute-pipeline>`
+
+  - Speed-of-Light: Compute Pipeline
+
+  - Arithmetic OPs count
+
+  - Compute pipeline stats
+
+  - Memory latencies
+
+- :ref:`Local Data Share (LDS) <grafana-panel-lds>`
+
+  - Speed-of-Light: LDS
+
+  - LDS stats
+
+- :ref:`Instruction Cache <grafana-panel-instruction-cache>`
+
+  - Speed-of-Light: Instruction Cache
+
+  - Instruction Cache Accesses
+
+- Constant Cache
+
+  - Speed-of-Light: Constant Cache
+
+  - Constant Cache Accesses
+
+  - Constant Cache - L2 Interface stats
+
+- :ref:`Texture Addresser and Texture Data <grafana-panel-ta>`
+
+  - Texture Addresser (TA)
+
+  - Texture Data (TD)
+
+- L1 Cache
+
+  - Speed-of-Light: L1 Cache
+
+  - L1 Cache Accesses
+
+  - L1 Cache Stalls
+
+  - L1 - L2 Transactions
+
+  - L1 - UTCL1 Interface stats
+
+- :ref:`L2 Cache <grafana-panel-l2-cache>`
+
+  - Speed-of-Light: L2 Cache
+
+  - L2 Cache Accesses
+
+  - L2 - EA Transactions
+
+  - L2 - EA Stalls
+
+- :ref:`L2 Cache Per Channel Performance <grafana-panel-l2-cache-per-channel>`
+
+  - Per-channel L2 Hit rate
+
+  - Per-channel L1-L2 Read requests
+
+  - Per-channel L1-L2 Write Requests
+
+  - Per-channel L1-L2 Atomic Requests
+
+  - Per-channel L2-EA Read requests
+
+  - Per-channel L2-EA Write requests
+
+  - Per-channel L2-EA Atomic requests
+
+  - Per-channel L2-EA Read latency
+
+  - Per-channel L2-EA Write latency
+
+  - Per-channel L2-EA Atomic latency
+
+  - Per-channel L2-EA Read stall (I/O, GMI, HBM)
+
+  - Per-channel L2-EA Write stall (I/O, GMI, HBM, Starve)
+
+Most panels are designed around a specific hardware component block to
+thoroughly understand its behavior. Additional panels, including custom panels,
+could also be added to aid the performance analysis.
+
+.. _grafana-panel-sys-info:
+
+System Info
+^^^^^^^^^^^
+
+.. figure:: ../../data/analyze/grafana/system-info_panel.png
+   :align: center
+   :alt: System details logged from the host machine
+   :width: 800
+
+   System details logged from the host machine.
+
+.. _grafana-panel-kernel-stats:
+
+Kernel Statistics
+^^^^^^^^^^^^^^^^^
+
+Kernel Time Histogram
++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/Kernel_time_histogram.png
+   :align: center
+   :alt: Kernel time histogram panel in Omniperf Grafana
+   :width: 800
+
+   Mapping application kernel launches to execution duration.
+
+Top Bottleneck Kernels
+++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/top-stat_panel.png
+   :align: center
+   :alt: Top bottleneck kernels panel in Omniperf Grafana
+   :width: 800
+
+   Top N kernels and relevant statistics. Sorted by total duration.
+
+Top Bottleneck Dispatches
++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/Top_bottleneck_dispatches.png
+   :align: center
+   :alt: Top bottleneck dispatches panel in Omniperf Grafana
+   :width: 800
+
+   Top N kernel dispatches and relevant statistics. Sorted by total duration.
+
+Current and Baseline Dispatch IDs (Filtered)
+++++++++++++++++++++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/Current_and_baseline_dispatch_ids.png
+   :align: center
+   :alt: Current and baseline dispatch IDs panel in Omniperf Grafana
+   :width: 800
+
+   List of all kernel dispatches.
+
+.. _grafana-panel-system-sol:
+
+System Speed-of-Light
+^^^^^^^^^^^^^^^^^^^^^
+
+.. figure:: ../../data/analyze/grafana/sol_panel.png
+   :align: center
+   :alt: System Speed-of-Light panel in Omniperf Grafana
+   :width: 800
+
+   Key metrics from various sections of Omniperf’s profiling report.
+
+.. tip::
+
+   See :doc:`/conceptual/system-speed-of-light` to learn about reported metrics.
+
+.. _grafana-panel-memory-chart-analysis:
+
+Memory Chart Analysis
+^^^^^^^^^^^^^^^^^^^^^
+
+.. note::
+
+   The Memory Chart Analysis support multiple normalizations. Due to limited
+   space, all transactions, when normalized to ``per_sec``, default to unit of
+   billion transactions per second.
+
+.. figure:: ../../data/analyze/grafana/memory-chart_panel.png
+   :align: center
+   :alt: Memory Chart Analysis panel in Omniperf Grafana
+   :width: 800
+
+   A graphical representation of performance data for memory blocks on the GPU.
+
+
+.. _grafana-panel-roofline-analysis:
+
+Empirical Roofline Analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. figure:: ../../data/analyze/grafana/roofline_panel.png
+   :align: center
+   :alt: Roofline Analysis panel in Omniperf Grafana
+   :width: 800
+
+   Visualize achieved performance relative to a benchmarked peak performance.
+
+
+.. _grafana-panel-cp:
+
+Command Processor
+^^^^^^^^^^^^^^^^^
+
+.. tip::
+
+   See :doc:`/conceptual/command-processor` to learn about reported metrics.
+
+Command Processor Fetcher
++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cpc_panel.png
+   :align: center
+   :alt: Command Processor Fetcher panel in Omniperf Grafana
+   :width: 800
+
+   Fetches commands out of memory to hand them over to the Command Processor
+   Fetcher (CPC) for processing
+
+Command Processor Compute
++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cpf_panel.png
+   :align: center
+   :alt: Command Processor Compute panel in Omniperf Grafana
+   :width: 800
+
+   The micro-controller running the command processing firmware that decodes the
+   fetched commands, and (for kernels) passes them to the Workgroup Managers
+   (SPIs) for scheduling.
+
+.. _grafana-panel-spi:
+
+Shader Processor Input (SPI)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. tip::
+
+   See :ref:`desc-spi` to learn about reported metrics.
+
+SPI Stats
++++++++++
+
+.. figure:: ../../data/analyze/grafana/spi-stats_panel.png
+   :align: center
+   :alt: SPI Stats panel in Omniperf Grafana
+   :width: 800
+
+..
+   TODO: Add caption after merge
+
+SPI Resource Allocation
++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/spi-resource-allocation_panel.png
+   :align: center
+   :alt: SPI Resource Allocation panel in Omniperf Grafana
+   :width: 800
+
+..
+   TODO: Add caption after merge
+
+.. _grafana-panel-wavefront:
+
+Wavefront
+^^^^^^^^^
+
+Wavefront Launch Stats
+++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/wavefront-launch-stats_panel.png
+   :align: center
+   :alt: Wavefront Launch Stats panel in Omniperf Grafana
+   :width: 800
+
+   General information about the kernel launch.
+
+.. tip::
+
+   See :ref:`wavefront-launch-stats` to learn about reported metrics.
+
+Wavefront Runtime Stats
++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/wavefront-runtime-stats_panel.png
+   :align: center
+   :alt: Wavefront Runtime Stats panel in Omniperf Grafana.
+   :width: 800
+
+   High-level overview of the execution of wavefronts in a kernel.
+
+.. tip::
+
+   See :ref:`wavefront-runtime-stats` to learn about reported metrics.
+
+.. _grafana-panel-cu-instruction-mix:
+
+Compute Unit - Instruction Mix
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Instruction Mix
++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-inst-mix_panel.png
+   :align: center
+   :alt: Instruction Mix panel in Omniperf Grafana
+   :width: 800
+
+   Breakdown of the various types of instructions executed by the user’s kernel,
+   and which pipelines on the Compute Unit (CU) they were executed on.
+
+.. tip::
+
+   See :ref:`instruction-mix` to learn about reported metrics.
+
+VALU Arithmetic Instruction Mix
++++++++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-value-arith-instr-mix_panel.png
+   :align: center
+   :alt: VALU Arithmetic Instruction Mix panel in Omniperf Grafana
+   :width: 800
+
+   The various types of vector instructions that were issued to the vector
+   arithmetic logic unit (VALU).
+
+.. tip::
+
+   See :ref:`valu-arith-instruction-mix` to learn about reported metrics.
+
+MFMA Arithmetic Instruction Mix
++++++++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-mafma-arith-instr-mix_panel.png
+   :align: center
+   :alt: MFMA Arithmetic Instruction Mix panel in Omniperf Grafana
+   :width: 800
+
+   The types of Matrix Fused Multiply-Add (MFMA) instructions that were issued.
+
+.. tip::
+
+   See :ref:`mfma-instruction-mix` to learn about reported metrics.
+
+VMEM Arithmetic Instruction Mix
++++++++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-vmem-instr-mix_panel.png
+   :align: center
+   :alt: VMEM Arithmetic Instruction Mix panel in Omniperf Grafana
+   :width: 800
+
+   The types of vector memory (VMEM) instructions that were issued.
+
+.. tip::
+
+   See :ref:`vmem-instruction-mix` to learn about reported metrics.
+
+.. _grafana-panel-cu-compute-pipeline:
+
+Compute Unit - Compute Pipeline
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Speed-of-Light
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-sol_panel.png
+   :align: center
+   :alt: Speed-of-Light (CU) panel in Omniperf Grafana
+   :width: 800
+
+   The number of floating-point and integer operations executed on the vector
+   arithmetic logic unit (VALU) and Matrix Fused Multiply-Add (MFMA) units in
+   various precisions.
+
+.. tip::
+
+   See :ref:`compute-speed-of-light` to learn about reported metrics.
+
+Pipeline Stats
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-pipeline-stats_panel.png
+   :align: center
+   :alt: Pipeline Stats panel in Omniperf Grafana
+   :width: 800
+
+   More detailed metrics to analyze the several independent pipelines found in
+   the Compute Unit (CU).
+
+.. tip::
+
+   See :ref:`pipeline-stats` to learn about reported metrics.
+
+Arithmetic Operations
++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/cu-arith-ops_panel.png
+   :align: center
+   :alt: Arithmetic Operations panel in Omniperf Grafana
+   :width: 800
+
+   The total number of floating-point and integer operations executed in various
+   precisions.
+
+.. tip::
+
+   See :ref:`arithmetic-operations` to learn about reported metrics.
+
+.. _grafana-panel-lds:
+
+Local Data Share (LDS)
+^^^^^^^^^^^^^^^^^^^^^^
+
+Speed-of-Light
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/lds-sol_panel.png
+   :align: center
+   :alt: Speed-of-Light (LDS) panel in Omniperf Grafana
+   :width: 800
+
+   Key metrics for the Local Data Share (LDS) as a comparison with the peak
+   achievable values of those metrics.
+
+.. tip::
+
+   See :ref:`lds-sol` to learn about reported metrics.
+
+LDS Stats
++++++++++
+
+.. figure:: ../../data/analyze/grafana/lds-stats_panel.png
+   :align: center
+   :alt: LDS Stats panel in Omniperf Grafana
+   :width: 800
+
+   More detailed view of the Local Data Share (LDS) performance.
+
+.. tip::
+
+   See :ref:`lds-stats` to learn about reported metrics.
+
+.. _grafana-panel-instruction-cache:
+
+Instruction Cache
+^^^^^^^^^^^^^^^^^
+
+Speed-of-Light
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/instr-cache-sol_panel.png
+   :align: center
+   :alt: Speed-of-Light (instruction cache) panel in Omniperf Grafana
+   :width: 800
+
+   Key metrics of the L1 Instruction (L1I) cache as a comparison with the peak
+   achievable values of those metrics.
+
+.. tip::
+
+   See :ref:`desc-l1i-sol` to learn about reported metrics.
+
+Instruction Cache Stats
++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/instr-cache-accesses_panel.png
+   :align: center
+   :alt: Instruction Cache Stats panel in Omniperf Grafana
+   :width: 800
+
+   More detail on the hit/miss statistics of the L1 Instruction (L1I) cache.
+
+.. tip::
+
+   See :ref:`desc-l1i-stats` to learn about reported metrics.
+
+.. _grafana-panel-sl1d-cache:
+
+Scalar L1D Cache
+^^^^^^^^^^^^^^^^
+
+.. tip::
+
+   See :ref:`desc-sl1d` to learn about reported metrics.
+
+Speed-of-Light
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/sl1d-sol_panel.png
+   :align: center
+   :alt: Speed-of-Light (SL1D) panel in Omniperf Grafana
+   :width: 800
+
+   Key metrics of the Scalar L1 Data (sL1D) cache as a comparison with the peak
+   achievable values of those metrics.
+
+.. tip::
+
+   See :ref:`desc-sl1d-sol` to learn about reported metrics.
+
+Scalar L1D Cache Accesses
++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/sl1d-cache-accesses_panel.png
+   :align: center
+   :alt: Scalar L1D Cache Accesses panel in Omniperf Grafana
+   :width: 800
+
+   More detail on the types of accesses made to the Scalar L1 Data (sL1D) cache,
+   and the hit/miss statistics.
+
+.. tip::
+
+   See :ref:`desc-sl1d-stats` to learn about reported metrics.
+
+Scalar L1D Cache - L2 Interface
++++++++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/sl1d-l12-interface_panel.png
+   :align: center
+   :alt: Scalar L1D Cache - L2 Interface panel in Omniperf Grafana
+   :width: 800
+
+   More detail on the data requested across the Scalar L1 Data (sL1D) cache <->
+   L2 interface.
+
+.. tip::
+
+   See :ref:`desc-sl1d-l2-interface` to learn about reported metrics.
+
+.. _grafana-panel-ta:
+
+Texture Address and Texture Data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Texture Addresser
++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/ta_panel.png
+   :align: center
+   :alt: Texture Addresser in Omniperf Grafana
+   :width: 800
+
+   Metric specific to texture addresser (TA) which receives commands (e.g.,
+   instructions) and write/atomic data from the Compute Unit (CU), and coalesces
+   them into fewer requests for the cache to process.
+
+.. tip::
+
+   See :ref:`desc-ta` to learn about reported metrics.
+
+.. _grafana-panel-td:
+
+Texture Data
+++++++++++++
+
+.. figure:: ../../data/analyze/grafana/td_panel.png
+   :align: center
+   :alt: Texture Data panel in Omniperf Grafana
+   :width: 800
+
+   Metrics specific to texture data (TD) which routes data back to the
+   requesting Compute Unit (CU).
+
+.. tip::
+
+   See :ref:`desc-td` to learn about reported metrics.
+
+.. _grafana-panel-vl1d:
+
+Vector L1 Data Cache
+^^^^^^^^^^^^^^^^^^^^
+
+Speed-of-Light
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/vl1d-sol_panel.png
+   :align: center
+   :alt: Speed-of-Light (VL1D) panel in Omniperf Grafana
+   :width: 800
+
+   Key metrics of the vector L1 data (vL1D) cache as a comparison with the peak
+   achievable values of those metrics.
+
+.. tip::
+
+   See :ref:`vl1d-sol` to learn about reported metrics.
+
+L1D Cache Stalls
+++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/vl1d-cache-stalls_panel.png
+   :align: center
+   :alt: L1D Cache Stalls panel in Omniperf Grafana
+   :width: 800
+
+   More detail on where vector L1 data (vL1D) cache is stalled in the pipeline,
+   which may indicate performance limiters of the cache.
+
+.. tip::
+
+   See :ref:`vl1d-cache-stall-metrics` to learn about reported metrics.
+
+L1D Cache Accesses
+++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/vl1d-cache-accesses_panel.png
+   :align: center
+   :alt: L1D Cache Accesses
+   :width: 800
+
+   The type of requests incoming from the cache front-end, the number of requests
+   that were serviced by the vector L1 data (vL1D) cache, and the number & type
+   of outgoing requests to the L2 cache.
+
+.. tip::
+
+   See :ref:`vl1d-cache-access-metrics` to learn about reported metrics.
+
+L1D - L2 Transactions
++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/vl1d-l2-transactions_panel.png
+   :align: center
+   :alt: L1D - L2 Transactions in Omniperf Grafana
+   :width: 800
+
+   A more granular look at the types of requests made to the L2 cache.
+
+.. tip::
+
+   See :ref:`vl1d-l2-transaction-detail` to learn more.
+
+L1D Addr Translation
+++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/vl1d-addr-translation_panel.png
+   :align: center
+   :alt: L1D Addr Translation panel in Omniperf Grafana
+   :width: 800
+
+   After a vector memory instruction has been processed/coalesced by the address
+   processing unit of the vector L1 data (vL1D) cache, it must be translated
+   from a virtual to physical address. These metrics provide more details on the
+   L1 Translation Lookaside Buffer (TLB) which handles this process.
+
+.. tip::
+
+   See :ref:`desc-utcl1` to learn about reported metrics.
+
+.. _grafana-panel-l2-cache:
+
+L2 Cache
+^^^^^^^^
+
+.. tip::
+
+   See :doc:`/conceptual/l2-cache` to learn about reported metrics.
+
+Speed-of-Light
+++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/l2-sol_panel.png
+   :align: center
+   :alt: Speed-of-Light (L2 cache) panel in Omniperf Grafana
+   :width: 800
+
+   Key metrics about the performance of the L2 cache, aggregated over all the
+   L2 channels, as a comparison with the peak achievable values of those
+   metrics.
+
+.. tip::
+
+   See :ref:`l2-sol` to learn about reported metrics.
+
+L2 Cache Accesses
++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/l2-accesses_panel.png
+   :align: center
+   :alt: L2 Cache Accesses panel in Omniperf Grafana
+   :width: 800
+
+   Incoming requests to the L2 cache from the vector L1 data (vL1D) cache and
+   other clients (e.g., the sL1D and L1I caches).
+
+.. tip::
+
+   See :ref:`l2-cache-accesses` to learn about reported metrics.
+
+L2 - Fabric Transactions
+++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/l2-fabric-transactions_panel.png
+   :align: center
+   :alt: L2 - Fabric Transactions panel in Omniperf Grafana
+   :width: 800
+
+   More detail on the flow of requests through Infinity Fabric™.
+
+.. tip::
+
+   See :ref:`l2-fabric` to learn about reported metrics.
+
+L2 - Fabric Interface Stalls
+++++++++++++++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/l2-fabric-interface-stalls_panel.png
+   :align: center
+   :alt: L2 - Fabric Interface Stalls panel in Omniperf Grafana
+   :width: 800
+
+   A breakdown of what types of requests in a kernel caused a stall
+   (e.g., read vs write), and to which locations (e.g., to the accelerator’s
+   local memory, or to remote accelerators/CPUs).
+
+.. tip::
+
+   See :ref:`l2-fabric-stalls` to learn about reported metrics.
+
+.. _grafana-panel-l2-cache-per-channel:
+
+L2 Cache Per Channel
+^^^^^^^^^^^^^^^^^^^^
+
+.. tip::
+
+   See :ref:`l2-sol` for more information.
+
+Aggregate Stats
++++++++++++++++
+
+.. figure:: ../../data/analyze/grafana/l2-per-channel-agg-stats_panel.png
+   :align: center
+   :alt: Aggregate Stats (L2 cache per channel) panel in Omniperf Grafana
+   :width: 800
+
+   L2 Cache per channel performance at a glance. Metrics are aggregated over all available channels.
diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst b/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst
new file mode 100644
index 0000000000..b34e1214c4
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst
@@ -0,0 +1,36 @@
+.. meta::
+   :description: How to use Omniperf's analyze mode
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              Grafana, analysis, analyze mode
+
+************
+Analyze mode
+************
+
+Omniperf offers several ways to interact with the metrics it generates from
+profiling. Your level of familiarity with the profiled application, computing
+environment, and experience with Omniperf should inform the analysis method you
+choose.
+
+While analyzing with the CLI offers quick and straightforward access to Omniperf
+metrics from the terminal, Grafana's dashboard GUI adds an extra layer of
+readability and interactivity you might prefer.
+
+See the following sections to explore Omniperf's analysis and visualization
+options.
+
+* :doc:`cli`
+* :doc:`grafana-gui`
+* :doc:`standalone-gui`
+
+.. note::
+
+   Analysis examples in this chapter borrow profiling results from the
+   ``vcopy.cpp`` workload introduced in :ref:`profile-example` in the
+   previous chapter.
+
+   Unless otherwise noted, the performance analysis is done on the
+   :ref:`MI200 platform <def-soc>`.
+
+Learn about profiling with Omniperf in :doc:`../profile/mode`. For an overview of
+Omniperf's other modes, see :ref:`modes`.
diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst
new file mode 100644
index 0000000000..a6a3e26f36
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst
@@ -0,0 +1,89 @@
+.. meta::
+   :description: Omniperf analysis: Standalone GUI
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, GUI, standalone, filter
+
+***********************
+Standalone GUI analysis
+***********************
+
+Omniperf's standalone analysis GUI is a lightweight web page that you can
+generate straight from the command line. The standalone analysis GUI is an
+alternative to the CLI if you want to explore profiling results visually, but
+without the additional setup requirements or server-side overhead of Omniperf's
+detailed :doc:`Grafana interface <grafana-gui>` option. This analysis
+option is implemented as a simple `Flask <https://flask.palletsprojects.com>`_
+application that lets you view results from your preferred web browser.
+
+.. note::
+
+   A point on *port forwarding*: the standalone GUI analyzer publishes its
+   web-based interface on port ``8050`` by default. On production HPC systems
+   where profiling jobs run under the control of a resource manager, additional
+   SSH tunneling between the desired web browser host (such as a login node or
+   remote workstation) and compute host may be required. Alternatively, you
+   might find it more convenient to download profiled workloads to perform
+   analysis on a local system.
+
+   See the :doc:`/reference/faq` for more details on SSH tunneling.
+
+Launch the standalone GUI analyzer
+----------------------------------
+
+To launch the Omniperf GUI analyzer, include the ``--gui`` flag with your
+desired analysis command. For example:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/vcopy/MI200/ --gui
+
+     ___                  _                  __ 
+    / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+   | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+   | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+    \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                           |_|                  
+
+   Analysis mode = web_ui
+   [analysis] deriving Omniperf metrics...
+   Dash is running on http://0.0.0.0:8050/
+
+    * Serving Flask app 'omniperf_analyze.analysis_webui' (lazy loading)
+    * Environment: production
+      WARNING: This is a development server. Do not use it in a production deployment.
+      Use a production WSGI server instead.
+    * Debug mode: off
+    * Running on all addresses (0.0.0.0)
+      WARNING: This is a development server. Do not use it in a production deployment.
+    * Running on http://127.0.0.1:8050
+    * Running on http://10.228.33.172:8050 (Press CTRL+C to quit)
+
+At this point, you can launch your web browser of choice and navigate to
+``http://localhost:8050/`` to view the analysis interface.
+
+.. image:: ../../data/analyze/standalone_gui.png
+   :align: center
+   :alt: Omniperf standalone GUI home screen
+   :width: 800
+
+.. tip::
+
+   To launch the standalone GUI analyzer web app on a port other than ``8050``,
+   include the optional argument ``--gui <desired port>``.
+
+When no filters are applied, you'll see five basic sections derived from your
+application's profiling data:
+
+#. Memory Chart Analysis
+#. Empirical Roofline Analysis
+#. Top Stats (Top Kernel Statistics)
+#. System Info
+#. System Speed-of-Light
+
+To dive deeper, use the dropdown menus at the top of the screen to isolate
+particular kernels or dispatches. You should see the web page update with
+metrics specific to your selected filters.
+
+Once a filter is applied, you'll see several additional sections become
+available with detailed metrics specific to that area of AMD hardware. These
+detailed sections mirror the data displayed in Omniperf's
+:doc:`Grafana interface <grafana-gui>`.
diff --git a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst
new file mode 100644
index 0000000000..de23a801ba
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst
@@ -0,0 +1,455 @@
+.. meta::
+   :description: How to use Omniperf's profile mode
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              profiling, profile mode
+
+************
+Profile mode
+************
+
+The following chapter walks you through Omniperf's core profiling features by
+example.
+
+Learn about analysis with Omniperf in :doc:`../analyze/mode`. For an overview of
+Omniperf's other modes, see :ref:`modes`.
+
+Profiling
+=========
+
+Use the ``omniperf`` executable to acquire all necessary performance monitoring
+data through analysis of compute workloads.
+
+Profiling with Omniperf yields the following benefits.
+
+* :ref:`Automate counter collection <profiling-routine>`: Omniperf handles all
+  of your profiling via pre-configured input files.
+
+* :ref:`Filtering <filtering>`: Apply runtime filters to speed up the profiling
+  process.
+
+* :ref:`Standalone roofline <standalone-roofline>`: Isolate a subset of built-in
+  metrics or build your own profiling configuration.
+
+Run ``omniperf profile -h`` for more details. See
+:ref:`Basic usage <modes-profile>`.
+
+.. _profile-example:
+
+Profiling example
+-----------------
+
+The `<https://github.com/ROCm/omniperf/blob/main/sample/vcopy.cpp>`__ repository
+includes source code for a sample GPU compute workload, ``vcopy.cpp``. A copy of
+this file is available in the ``share/sample`` subdirectory after a normal
+Omniperf installation, or via the ``$OMNIPERF_SHARE/sample`` directory when
+using the supplied modulefile.
+
+The examples in this section use a compiled version of the ``vcopy`` workload to
+demonstrate the use of Omniperf in MI accelerator performance analysis. Unless
+otherwise noted, the performance analysis is done on the
+:ref:`MI200 platform <def-soc>`.
+
+Workload compilation
+^^^^^^^^^^^^^^^^^^^^
+
+The following example demonstrates compilation of ``vcopy``.
+
+.. code-block:: shell
+
+   $ hipcc vcopy.cpp -o vcopy
+   $ ls
+   vcopy   vcopy.cpp
+   $ ./vcopy -n 1048576 -b 256
+   vcopy testing on GCD 0
+   Finished allocating vectors on the CPU
+   Finished allocating vectors on the GPU
+   Finished copying vectors to the GPU
+   sw thinks it moved 1.000000 KB per wave
+   Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384:
+   Launching the  kernel on the GPU
+   Finished executing kernel
+   Finished copying the output vector from the GPU to the CPU
+   Releasing GPU memory
+   Releasing CPU memory
+
+The following sample command profiles the ``vcopy`` workload.
+
+.. code-block:: shell
+
+   $ omniperf profile --name vcopy -- ./vcopy -n 1048576 -b 256
+
+     ___                  _                  __ 
+    / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+   | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+   | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+    \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                           |_|                  
+
+   Omniperf version: 2.0.0
+   Profiler choice: rocprofv1
+   Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   Target: MI200
+   Command: ./vcopy -n 1048576 -b 256
+   Kernel Selection: None
+   Dispatch Selection: None
+   Hardware Blocks: All
+
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   Collecting Performance Counters
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+   [profiling] Current input file: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200/perfmon/SQ_IFETCH_LEVEL.txt
+      |-> [rocprof] RPL: on '240312_174329' from '/opt/rocm-5.2.1' in '/home/auser/repos/omniperf/src/omniperf'
+      |-> [rocprof] RPL: profiling '""./vcopy -n 1048576 -b 256""'
+      |-> [rocprof] RPL: input file '/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/perfmon/SQ_IFETCH_LEVEL.txt'
+      |-> [rocprof] RPL: output dir '/tmp/rpl_data_240312_174329_692890'
+      |-> [rocprof] RPL: result dir '/tmp/rpl_data_240312_174329_692890/input0_results_240312_174329'
+      |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_240312_174329_692890/input0.xml"
+      |-> [rocprof] gpu_index =
+      |-> [rocprof] kernel =
+      |-> [rocprof] range =
+      |-> [rocprof] 6 metrics
+      |-> [rocprof] GRBM_COUNT, GRBM_GUI_ACTIVE, SQ_WAVES, SQ_IFETCH, SQ_IFETCH_LEVEL, SQ_ACCUM_PREV_HIRES
+      |-> [rocprof] vcopy testing on GCD 0
+      |-> [rocprof] Finished allocating vectors on the CPU
+      |-> [rocprof] Finished allocating vectors on the GPU
+      |-> [rocprof] Finished copying vectors to the GPU
+      |-> [rocprof] sw thinks it moved 1.000000 KB per wave
+      |-> [rocprof] Total threads: 1048576, Grid Size: 4096 block Size:256, Wavefronts:16384:
+      |-> [rocprof] Launching the  kernel on the GPU
+      |-> [rocprof] Finished executing kernel
+      |-> [rocprof] Finished copying the output vector from the GPU to the CPU
+      |-> [rocprof] Releasing GPU memory
+      |-> [rocprof] Releasing CPU memory
+      |-> [rocprof] 
+     |-> [rocprof] ROCPRofiler: 1 contexts collected, output directory /tmp/rpl_data_240312_174329_692890/input0_results_240312_174329
+       |-> [rocprof] File '/home/auser/repos/omniperf/sample/workloads/vcopy/MI200/SQ_IFETCH_LEVEL.csv' is generating
+      |-> [rocprof] 
+   [profiling] Current input file: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200/perfmon/SQ_INST_LEVEL_LDS.txt
+
+   ...
+
+   [roofline] Checking for roofline.csv in /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   [roofline] No roofline data found. Generating...
+   Empirical Roofline Calculation
+   Copyright © 2022  Advanced Micro Devices, Inc. All rights reserved.
+   Total detected GPU devices: 4
+   GPU Device 0: Profiling...
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+  HBM BW, GPU ID: 0, workgroupSize:256, workgroups:2097152, experiments:100, traffic:8589934592 bytes, duration:6.2 ms, mean:1388.0 GB/sec, stdev=3.1 GB/sec
+     99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+  L2 BW, GPU ID: 0, workgroupSize:256, workgroups:8192, experiments:100, traffic:687194767360 bytes, duration:136.5 ms, mean:5020.8 GB/sec, stdev=16.5 GB/sec
+     99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+  L1 BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:26843545600 bytes, duration:2.9 ms, mean:9229.5 GB/sec, stdev=2.9 GB/sec
+     99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   LDS BW, GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, traffic:33554432000 bytes, duration:1.9 ms, mean:17645.6 GB/sec, stdev=20.1 GB/sec
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak FLOPs (FP32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:274877906944, duration:13.078 ms, mean:20986.9 GFLOPS, stdev=310.8 GFLOPS
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak FLOPs (FP64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:137438953472, duration:6.7 ms, mean:20408.029297.1 GFLOPS, stdev=2.7 GFLOPS
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak MFMA FLOPs (BF16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:12.6 ms, mean:170280.0 GFLOPS, stdev=22.3 GFLOPS
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak MFMA FLOPs (F16), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:2147483648000, duration:13.0 ms, mean:164733.6 GFLOPS, stdev=24.3 GFLOPS
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak MFMA FLOPs (F32), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:536870912000, duration:13.0 ms, mean:41399.6 GFLOPS, stdev=4.1 GFLOPS
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak MFMA FLOPs (F64), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, FLOP:268435456000, duration:6.5 ms, mean:41379.2 GFLOPS, stdev=4.4 GFLOPS
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+   Peak MFMA IOPs (I8), GPU ID: 0, workgroupSize:256, workgroups:16384, experiments:100, IOP:2147483648000, duration:12.9 ms, mean:166281.9 GOPS, stdev=2495.9 GOPS
+   GPU Device 1: Profiling...
+   ...
+   GPU Device 2: Profiling...
+   ...
+   GPU Device 3: Profiling...
+   ...
+
+.. tip::
+
+   To reduce verbosity of profiling output try the ``--quiet`` flag. This hides
+   ``rocprof`` output and activates a progress bar.
+
+.. _profiling-routine:
+
+Notice the two main stages in Omniperf's *default* profiling routine.
+
+1. The first stage collects all the counters needed for Omniperf analysis
+   (omitting any filters you have provided).
+
+2. The second stage collects data for the roofline analysis (this stage can be
+   disabled using ``--no-roof``).
+
+At the end of profiling, you can find all resulting ``csv`` files in a
+:ref:`SoC <def-soc>`-specific target directory; for
+example:
+
+* "MI300A" or "MI300X" for the AMD Instinct™ MI300 family of accelerators
+* "MI200" for the AMD Instinct MI200 family of accelerators
+* "MI100" for the AMD Instinct MI100 family of accelerators
+
+The SoC names are generated as a part of Omniperf, and do not *always*
+distinguish between different accelerators in the same family; for instance,
+an Instinct MI210 vs an Instinct MI250.
+
+.. note::
+
+   Additionally, you will notice a few extra files. An SoC parameters file, 
+   ``sysinfo.csv``, is created to reflect the target device settings. All
+   profiling output is stored in ``log.txt``. Roofline-specific benchmark
+   results are stored in ``roofline.csv``.
+
+.. code-block:: shell
+
+   $ ls workloads/vcopy/MI200/
+   total 112
+   total 60
+   -rw-r--r-- 1 auser agroup 27937 Mar  1 15:15 log.txt
+   drwxr-xr-x 1 auser agroup     0 Mar  1 15:15 perfmon
+   -rw-r--r-- 1 auser agroup 26175 Mar  1 15:15 pmc_perf.csv
+   -rw-r--r-- 1 auser agroup  1708 Mar  1 15:17 roofline.csv
+   -rw-r--r-- 1 auser agroup   519 Mar  1 15:15 SQ_IFETCH_LEVEL.csv
+   -rw-r--r-- 1 auser agroup   456 Mar  1 15:15 SQ_INST_LEVEL_LDS.csv
+   -rw-r--r-- 1 auser agroup   474 Mar  1 15:15 SQ_INST_LEVEL_SMEM.csv
+   -rw-r--r-- 1 auser agroup   474 Mar  1 15:15 SQ_INST_LEVEL_VMEM.csv
+   -rw-r--r-- 1 auser agroup   599 Mar  1 15:15 SQ_LEVEL_WAVES.csv
+   -rw-r--r-- 1 auser agroup   650 Mar  1 15:15 sysinfo.csv
+   -rw-r--r-- 1 auser agroup   399 Mar  1 15:15 timestamps.csv
+
+.. _filtering:
+
+Filtering
+=========
+
+To reduce profiling time and the counters collected, you should use profiling
+filters. Profiling filters and their functionality depend on the underlying
+profiler being used. While Omniperf is profiler-agnostic, this following is a
+detailed description of profiling filters available when using Omniperf with
+:doc:`ROCProfiler <rocprofiler:index>`.
+
+Filtering options
+-----------------
+
+``-b``, ``--block <block-name>``
+   Allows system profiling on one or more selected hardware components to speed
+   up the profiling process. See :ref:`profiling-hw-component-filtering`.
+
+``-k``, ``--kernel <kernel-substr>``
+   Allows for kernel filtering. Usage is equivalent with the current ``rocprof``
+   utility. See :ref:`profiling-kernel-filtering`.
+
+``-d``, ``--dispatch <dispatch-id>``
+   Allows for dispatch ID filtering. Usage is equivalent with the current
+   ``rocprof`` utility. See :ref:`profiling-dispatch-filtering`.
+
+.. tip::
+
+   Be cautious when combining different profiling filters in the same call.
+   Conflicting filters may result in error.
+
+   For example, filtering a dispatch, but that dispatch doesn't match your
+   kernel name filter.
+
+.. _profiling-hw-component-filtering:
+
+Hardware component filtering
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can profile specific hardware components to speed up the profiling process.
+In Omniperf, the term hardware block to refers to a hardware component or a
+group of hardware components. All profiling results are accumulated in the same
+target directory without overwriting those for other hardware components. This
+enables incremental profiling and analysis.
+
+The following example only gathers hardware counters for the shader sequencer
+(SQ) and L2 cache (TCC) components, skipping all other hardware components.
+
+.. code-block:: shell
+
+   $ omniperf profile --name vcopy -b SQ TCC -- ./vcopy -n 1048576 -b 256
+
+     ___                  _                  __ 
+    / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+   | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+   | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+    \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                           |_|                  
+
+   fname: pmc_cpc_perf: Skipped
+   fname: pmc_spi_perf: Skipped
+   fname: pmc_cpf_perf: Skipped
+   fname: pmc_tcp_perf: Skipped
+   fname: pmc_sq_perf4: Added
+   fname: pmc_tcc_perf: Added
+   fname: pmc_sq_perf8: Added
+   fname: pmc_ta_perf: Skipped
+   fname: pmc_sq_perf1: Added
+   fname: pmc_sq_perf3: Added
+   fname: pmc_td_perf: Skipped
+   fname: pmc_tcc2_perf: Skipped
+   fname: pmc_sqc_perf1: Skipped
+   fname: pmc_sq_perf6: Added
+   fname: pmc_sq_perf2: Added
+   Omniperf version: 2.0.0
+   Profiler choice: rocprofv1
+   Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   Target: MI200
+   Command: ./vcopy -n 1048576 -b 256
+   Kernel Selection: None
+   Dispatch Selection: None
+   Hardware Blocks: ['sq', 'tcc']
+
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   Collecting Performance Counters
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   ...
+
+.. _profiling-kernel-filtering:
+
+Kernel filtering
+^^^^^^^^^^^^^^^^
+
+Kernel filtering is based on the name of the kernels you want to isolate. Use a
+kernel name substring list to isolate desired kernels.
+
+The following example demonstrates profiling isolating the kernel matching
+substring ``vecCopy``.
+
+.. code-block:: shell
+
+   $ omniperf profile --name vcopy -k vecCopy -- ./vcopy -n 1048576 -b 256
+
+     ___                  _                  __ 
+    / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+   | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+   | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+    \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                           |_|                  
+
+   Omniperf version: 2.0.0
+   Profiler choice: rocprofv1
+   Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   Target: MI200
+   Command: ./vcopy -n 1048576 -b 256
+   Kernel Selection: ['vecCopy']
+   Dispatch Selection: None
+   Hardware Blocks: All
+
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   Collecting Performance Counters
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   ...
+
+.. _profiling-dispatch-filtering:
+
+Dispatch filtering
+^^^^^^^^^^^^^^^^^^
+
+Dispatch filtering is based on the *global* dispatch index of kernels in a run. 
+
+The following example profiles only the first kernel dispatch in the execution
+of the application (note zero-based indexing).
+
+.. code-block:: shell
+
+   $ omniperf profile --name vcopy -d 0 -- ./vcopy -n 1048576 -b 256
+
+     ___                  _                  __ 
+    / _ \ _ __ ___  _ __ (_)_ __   ___ _ __ / _|
+   | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ 
+   | |_| | | | | | | | | | | |_) |  __/ |  |  _|
+    \___/|_| |_| |_|_| |_|_| .__/ \___|_|  |_|  
+                           |_|                  
+
+   Omniperf version: 2.0.0
+   Profiler choice: rocprofv1
+   Path: /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   Target: MI200
+   Command: ./vcopy -n 1048576 -b 256
+   Kernel Selection: None
+   Dispatch Selection: ['0']
+   Hardware Blocks: All
+
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   Collecting Performance Counters
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   ...
+
+.. _standalone-roofline:
+
+Standalone roofline
+===================
+
+If you are only interested in generating roofline analysis data try using
+``--roof-only``. This will only collect counters relevant to roofline, as well
+as generate a standalone ``.pdf`` output of your roofline plot. 
+
+Roofline options
+----------------
+
+``--sort <desired_sort>``
+   Allows you to specify whether you would like to overlay top kernel or top
+   dispatch data in your roofline plot.
+
+``-m``, ``--mem-level <cache_level>``
+   Allows you to specify specific levels of cache to include in your roofline
+   plot.
+
+``--device <gpu_id>``
+   Allows you to specify a device ID to collect performance data from when
+   running a roofline benchmark on your system.
+
+To distinguish different kernels in your ``.pdf`` roofline plot use
+``--kernel-names``. This will give each kernel a unique marker identifiable from
+the plot's key.
+
+
+Roofline only
+-------------
+
+The following example demonstrates profiling roofline data only:
+
+.. code-block:: shell
+
+   $ omniperf profile --name vcopy --roof-only -- ./vcopy -n 1048576 -b 256
+
+   ...
+   [roofline] Checking for roofline.csv in /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   [roofline] No roofline data found. Generating...
+   Checking for roofline.csv in /home/auser/repos/omniperf/sample/workloads/vcopy/MI200
+   Empirical Roofline Calculation
+   Copyright © 2022  Advanced Micro Devices, Inc. All rights reserved.
+   Total detected GPU devices: 4
+   GPU Device 0: Profiling...
+    99% [||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
+    ...
+   Empirical Roofline PDFs saved!
+
+An inspection of our workload output folder shows ``.pdf`` plots were generated
+successfully.
+
+.. code-block:: shell
+
+   $ ls workloads/vcopy/MI200/
+   total 48
+   -rw-r--r-- 1 auser agroup 13331 Mar  1 16:05 empirRoof_gpu-0_fp32_fp64.pdf
+   -rw-r--r-- 1 auser agroup 13136 Mar  1 16:05 empirRoof_gpu-0_int8_fp16.pdf
+   drwxr-xr-x 1 auser agroup     0 Mar  1 16:03 perfmon
+   -rw-r--r-- 1 auser agroup  1101 Mar  1 16:03 pmc_perf.csv
+   -rw-r--r-- 1 auser agroup  1715 Mar  1 16:05 roofline.csv
+   -rw-r--r-- 1 auser agroup   650 Mar  1 16:03 sysinfo.csv
+   -rw-r--r-- 1 auser agroup   399 Mar  1 16:03 timestamps.csv
+
+.. note::
+
+   Omniperf generates two roofline outputs to organize results and reduce
+   clutter. One chart plots FP32/FP64 performance while the other plots I8/FP16
+   performance.
+
+The following image is a sample ``empirRoof_gpu-ALL_fp32_fp64.pdf`` roofline
+plot.
+
+.. image:: ../../data/profile/sample-roof-plot.png
+   :align: center
+   :alt: Sample Omniperf roofline output
+   :width: 800
+
diff --git a/projects/rocprofiler-compute/docs/how-to/use.rst b/projects/rocprofiler-compute/docs/how-to/use.rst
new file mode 100644
index 0000000000..7377dd9f95
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/how-to/use.rst
@@ -0,0 +1,251 @@
+.. meta::
+   :description: Omniperf basic usage
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              basics, usage, operations
+
+***********
+Basic usage
+***********
+
+The following section outlines basic Omniperf workflows, modes, options, and
+operations.
+
+Command line profiler
+=====================
+
+Launch and profile the target application using the command line profiler.
+
+The command line profiler launches the target application, calls the
+ROCProfiler API via the ``rocprof`` binary, and collects profile results for
+the specified kernels, dispatches, and hardware components. If not
+specified, Omniperf defaults to collecting all available counters for all
+kernels and dispatches launched by the your executable.
+
+To collect the default set of data for all kernels in the target
+application, launch, for example:
+
+.. code-block:: shell
+
+   $ omniperf profile -n vcopy_data -- ./vcopy -n 1048576 -b 256
+
+This runs the app, launches each kernel, and generates profiling results. By
+default, results are written to a subdirectory with your accelerator's name;
+for example, ``./workloads/vcopy_data/MI200/``, where name is configurable
+via the ``-n`` argument.
+
+.. note::
+
+   To collect all requested profile information, Omniperf might replay kernels
+   multiple times.
+
+.. _basic-filter-data-collection:
+
+Customize data collection
+-------------------------
+
+Options are available to specify for which kernels and metrics data should be
+collected. Note that you can apply filtering in either the profiling or
+analysis stage. Filtering at profiling collection often speeds up your
+aggregate profiling run time.
+
+Common filters to customize data collection include:
+
+``-k``, ``--kernel``
+   Enables filtering kernels by name.
+
+``-d``, ``--dispatch``
+   Enables filtering based on dispatch ID.
+
+``-b``, ``--block``
+   Enables collection metrics for only the specified (one or more) hardware
+   component blocks.
+
+See :ref:`Filtering <filtering>` for an in-depth walkthrough.
+
+To view available metrics by hardware block, use the ``--list-metrics``
+argument:
+
+.. code-block:: shell
+
+   $ omniperf analyze --list-metrics <sys_arch>
+
+.. _basic-analyze-cli:
+
+Analyze in the command line
+---------------------------
+
+After generating a local output folder (for example,
+``./workloads/vcopy_data/MI200``), use the command line tool to quickly
+interface with profiling results. View different metrics derived from your
+profiled results and get immediate access all metrics organized by hardware
+blocks.
+
+If you don't apply kernel, dispatch, or hardware block filters at this stage,
+analysis is reflective of the entirety of the profiling data.
+
+To interact with profiling results from a different session, provide the
+workload path.
+
+``-p``, ``--path``
+   Enables you to analyze existing profiling data in the Omniperf CLI.
+
+See :doc:`analyze/cli` for more detailed information.
+
+.. _basic-analyze-grafana:
+
+Analyze in the Grafana GUI
+--------------------------
+
+To conduct a more in-depth analysis of profiling results, it's suggested to use
+a Grafana GUI with Omniperf. To interact with profiling results, import your
+data to the MongoDB instance included in the Omniperf Dockerfile. See
+:doc:`/install/grafana-setup`.
+
+To interact with Grafana data, stored in the Omniperf database, enter
+``database`` :ref:`mode <modes-database>`; for example:
+
+.. code-block:: shell
+
+   $ omniperf database --import [CONNECTION OPTIONS]
+
+See :doc:`/how-to/analyze/grafana-gui` for more detailed information.
+
+.. _modes:
+
+Modes
+=====
+
+Modes change the fundamental behavior of the Omniperf command line tool.
+Depending on which mode you choose, different command line options become
+available.
+
+.. _modes-profile:
+
+Profile mode
+------------
+
+``profile``
+   Launches the target application on the local system using
+   :doc:`ROCProfiler <rocprofiler:index>`. Depending on the profiling options
+   chosen, selected kernels, dispatches, and or hardware components used by the
+   application are profiled. It stores results locally in an output folder:
+   ``./workloads/\<name>``.
+
+   .. code-block:: shell
+
+      $ omniperf profile --help
+
+See :doc:`profile/mode` to learn about this mode in depth and to get started
+profiling with Omniperf.
+
+.. _modes-analyze:
+
+Analyze mode
+------------
+
+``analyze``
+   Loads profiling data from the ``--path`` (``-p``) directory into the Omniperf
+   CLI analyzer where you have immediate access to profiling results and
+   generated metrics. It generates metrics from the entirety of your profiled
+   application or a subset identified through the Omniperf CLI analysis filters.
+
+   To generate a lightweight GUI interface, you can add the ``--gui`` flag to your
+   analysis command.
+
+   This mode is a middle ground to the highly detailed Omniperf Grafana GUI and
+   is great if you want immediate access to a hardware component you’re already
+   familiar with.
+
+   .. code-block:: shell
+
+      $ omniperf analyze --help
+
+See :doc:`analyze/mode` to learn about this mode in depth and to get started
+with analysis using Omniperf.
+
+.. _modes-database:
+
+Database mode
+-------------
+
+``database``
+   The Grafana analyzer GUI is built on a MongoDB database. ``--import``
+   profiling results to the DB to interact with the workload in Grafana or
+   ``--remove`` the workload from the DB.
+
+   Connection options need to be specified. See :doc:`/how-to/analyze/grafana-gui` for
+   more details.
+
+   .. code-block:: shell
+
+      $ omniperf database --help
+
+See :doc:`/install/grafana-setup` to learn about setting up a Grafana server and
+database instance to make your profiling data more digestible and shareable.
+
+.. _global-options:
+
+Global options
+==============
+
+The Omniperf command line tool has a set of *global* utility options that are
+available across all modes. 
+
+``-v``, ``--version``
+   Prints the Omniperf version and exits.
+
+``-V``, ``--verbose``
+   Increases output verbosity. Use multiple times for higher levels of
+   verbosity.
+
+``-q``, ``--quiet``
+   Reduces output verbosity and runs quietly.
+
+``-s``, ``--specs``
+   Prints system specs and exits.
+
+.. note::
+
+   Omniperf also recognizes the project variable, ``OMNIPERF_COLOR`` should you
+   choose to disable colorful output. To disable default colorful behavior, set
+   this variable to ``0``.
+
+.. _basic-operations:
+
+Basic operations
+================
+
+The following table lists Omniperf's basic operations, their
+:ref:`modes <modes>`, and required arguments.
+
+.. list-table::
+   :header-rows: 1
+
+   * - Operation description
+     - Mode
+     - Required arguments
+
+   * - :doc:`Profile a workload </how-to/profile/mode>`
+     - ``profile``
+     - ``--name``, ``-- <profile_cmd>``
+
+   * - :ref:`Standalone roofline analysis <standalone-roofline>`
+     - ``profile``
+     - ``--name``, ``--roof-only``, ``-- <profile_cmd>``
+
+   * - :ref:`Import a workload to database <grafana-gui-import>`
+     - ``database``
+     - ``--import``, ``--host``, ``--username``, ``--workload``, ``--team``
+
+   * - :ref:`Remove a workload from database <grafana-gui-remove>`
+     - ``database``
+     - ``--remove``, ``--host``, ``--username``, ``--workload``, ``--team``
+
+   * - :doc:`Launch standalone GUI from CLI </how-to/analyze/standalone-gui>`
+     - ``analyze``
+     - ``--path``, ``--gui``
+
+   * - :doc:`Interact with profiling results from CLI </how-to/analyze/cli>`
+     - ``analyze``
+     - ``--path``
+
diff --git a/projects/rocprofiler-compute/docs/index.rst b/projects/rocprofiler-compute/docs/index.rst
new file mode 100644
index 0000000000..1df329e7de
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/index.rst
@@ -0,0 +1,87 @@
+.. meta::
+   :description: Omniperf documentation and reference
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD
+
+**********************
+Omniperf documentation
+**********************
+
+Omniperf documentation provides a comprehensive overview of Omniperf.
+In addition to a full deployment guide with installation instructions, this
+documentation also explains the ideas motivating the design behind the tool and
+its components.
+
+If you're new to Omniperf, familiarize yourself with the tool by reviewing the
+chapters that follow and gradually learn its more advanced features. To get
+started, see :doc:`What is Omniperf? <what-is-omniperf>`.
+
+Omniperf is open source and hosted at `<https://github.com/ROCm/omniperf>`__.
+
+.. grid:: 2
+   :gutter: 3
+
+   .. grid-item-card:: Install
+
+      * :doc:`install/core-install`
+      * :doc:`Grafana server for Omniperf <install/grafana-setup>`
+
+   .. grid-item::
+
+Use the following topics to learn more about the advantages of Omniperf in your
+development toolkit, how it aims to model performance, and how to use Omniperf
+in practice.
+
+.. grid:: 2
+   :gutter: 3
+
+   .. grid-item-card:: How to
+
+      * :doc:`how-to/use`
+
+      * :doc:`how-to/profile/mode`
+
+      * :doc:`how-to/analyze/mode`
+
+        * :doc:`how-to/analyze/cli`
+
+        * :doc:`how-to/analyze/grafana-gui`
+
+        * :doc:`how-to/analyze/standalone-gui`
+
+   .. grid-item-card:: Conceptual
+
+      * :doc:`conceptual/performance-model`
+
+        * :doc:`conceptual/compute-unit`
+
+        * :doc:`conceptual/l2-cache`
+
+        * :doc:`conceptual/shader-engine`
+
+        * :doc:`conceptual/command-processor`
+
+        * :doc:`conceptual/system-speed-of-light`
+
+      * :doc:`conceptual/definitions`
+
+        * :ref:`normalization-units`
+
+   .. grid-item-card:: Tutorials
+
+      * :doc:`tutorial/profiling-by-example`
+
+      * :doc:`Learning resources <tutorial/learning-resources>`
+
+   .. grid-item-card:: Reference
+
+      * :doc:`reference/compatible-accelerators`
+
+      * :doc:`reference/faq`
+
+This project is proudly open source. For more details on how to contribute,
+refer to
+`Contributing to ROCm <https://rocm.docs.amd.com/en/latest/contribute/contributing.html>`_.
+
+Find ROCm licensing information on the
+`Licensing <https://rocm.docs.amd.com/en/latest/about/license.html>`_ page.
+
diff --git a/projects/rocprofiler-compute/docs/install/core-install.rst b/projects/rocprofiler-compute/docs/install/core-install.rst
new file mode 100644
index 0000000000..1d28b07b5d
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/install/core-install.rst
@@ -0,0 +1,236 @@
+.. meta::
+   :description: Omniperf installation and deployment
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              install, deploy, Grafana, client, configuration, modulefiles
+
+*********************************
+Installing and deploying Omniperf
+*********************************
+
+Omniperf consists of two installation components.
+
+* :ref:`Omniperf core installation <core-install>` (client-side)
+
+  * Provides the core application profiling capability.
+  * Allows the collection of performance counters, filtering by hardware
+    block, dispatch, kernel, and more.
+  * Provides a CLI-based analysis mode.
+  * Provides a standalone web interface for importing analysis metrics.
+
+* :doc:`Grafana server for Omniperf <grafana-setup>` (server-side) (*optional*)
+
+  * Hosts the MongoDB backend and Grafana instance.
+  * Is packaged in a Docker container for easy setup.
+
+Determine what you need to install based on how you would like to interact with
+Omniperf. See the following decision tree to help determine what installation is
+right for you.
+
+.. image:: ../data/install/install-decision-tree.png
+   :align: center
+   :alt: Decision tree for installing and deploying Omniperf
+   :width: 800
+
+.. _core-install:
+ 
+Core installation
+=================
+
+The core Omniperf application requires the following basic software
+dependencies. As of ROCm 6.2, the core Omniperf is included with your ROCm
+installation.
+
+* Python ``>= 3.8``
+* CMake ``>= 3.19``
+* ROCm ``>= 5.7.1``
+
+Omniperf depends on a number of Python packages documented in the top-level
+``requirements.txt`` file. Install these *before* configuring Omniperf.
+
+.. tip::
+
+   If looking to build Omniperf as a developer, consider these additional
+   requirements.
+
+   .. list-table::
+
+       * - ``docs/sphinx/requirements.txt``
+         - Python packages required to build this documentation from source.
+
+       * - ``requirements-test.txt``
+         - Python packages required to run Omniperf's CI suite using PyTest.
+
+The recommended procedure for Omniperf usage is to install into a shared file
+system so that multiple users can access the final installation. The
+following steps illustrate how to install the necessary Python dependencies
+using `pip <https://packaging.python.org/en/latest/>`_ and Omniperf into a
+shared location controlled by the ``INSTALL_DIR`` environment variable.
+
+.. _core-install-cmake-vars:
+
+Configuration variables
+-----------------------
+The following installation example leverages several
+`CMake <https://cmake.org/cmake/help/latest>`_ project variables defined as
+follows.
+
+.. list-table::
+    :header-rows: 1
+
+    * - CMake variable
+      - Description
+
+    * - ``CMAKE_INSTALL_PREFIX``
+      - Controls the install path for Omniperf files.
+
+    * - ``PYTHON_DEPS``
+      - Specifies an optional path to resolve Python package dependencies.
+
+    * - ``MOD_INSTALL_PATH``
+      - Specifies an optional path for separate Omniperf modulefile installation.
+
+.. _core-install-steps:
+
+Install from source
+-------------------
+
+#. A typical install begins by downloading the latest release tarball available
+   from `<https://github.com/ROCm/omniperf/releases>`__. From there, untar and
+   navigate into the top-level directory.
+
+   ..
+      {{ config.version }} substitutes the Omniperf version in ../conf.py
+
+   .. datatemplate:nodata::
+
+      .. code-block:: shell
+
+         tar xfz omniperf-v{{ config.version }}.tar.gz
+         cd omniperf-v{{ config.version }}
+
+#. Next, install Python dependencies and complete the Omniperf configuration and
+   install process.
+
+   .. datatemplate:nodata::
+
+      .. code-block:: shell
+
+         # define top-level install path
+         export INSTALL_DIR=<your-top-level-desired-install-path>
+
+         # install python deps
+         python3 -m pip install -t ${INSTALL_DIR}/python-libs -r requirements.txt
+
+         # configure Omniperf for shared install
+         mkdir build
+         cd build
+         cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/{{ config.version }} \
+                 -DPYTHON_DEPS=${INSTALL_DIR}/python-libs \
+                 -DMOD_INSTALL_PATH=${INSTALL_DIR}/modulefiles ..
+
+         # install
+         make install
+
+   .. tip::
+
+      You might need to ``sudo`` the final installation step if you don't have
+      write access for the chosen installation path.
+
+#. Upon successful installation, your top-level installation directory should
+   look like this.
+
+   .. datatemplate:nodata::
+
+      .. code-block:: shell
+
+         $ ls $INSTALL_DIR
+         modulefiles  {{ config.version }}  python-libs
+
+.. _core-install-modulefiles:
+
+Execution using modulefiles
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The installation process includes the creation of an environment modulefile for
+use with `Lmod <https://lmod.readthedocs.io>`_. On systems that support Lmod,
+you can register the Omniperf modulefile directory and setup your environment
+for execution of Omniperf as follows.
+
+.. datatemplate:nodata::
+
+   .. code-block:: shell
+
+      $ module use $INSTALL_DIR/modulefiles
+      $ module load omniperf
+      $ which omniperf
+      /opt/apps/omniperf/{{ config.version }}/bin/omniperf
+
+      $ omniperf --version
+      ROC Profiler:   /opt/rocm-5.1.0/bin/rocprof
+
+      omniperf (v{{ config.version }})
+
+.. tip::
+
+   If you're relying on an Lmod Python module locally, you may wish to customize
+   the resulting Omniperf modulefile post-installation to include extra
+   module dependencies.
+
+Execution without modulefiles
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To use Omniperf without the companion modulefile, update your ``PATH``
+settings to enable access to the command line binary. If you installed Python
+dependencies in a shared location, also update your ``PYTHONPATH``
+configuration.
+
+.. datatemplate:nodata::
+
+   .. code-block:: shell
+
+      export PATH=$INSTALL_DIR/{{ config.version }}/bin:$PATH
+      export PYTHONPATH=$INSTALL_DIR/python-libs
+
+.. _core-install-package:
+
+Install via package manager
+---------------------------
+
+Once ROCm (minimum version 6.2.0) is installed, you can install Omniperf using
+your operating system's native package manager using the following commands.
+See :doc:`rocm-install-on-linux:index` for guidance on installing the ROCm
+software stack.
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu
+
+      .. code-block:: shell
+
+         $ sudo apt install omniperf
+         $ pip install -r /opt/rocm/libexec/omniperf/requirements.txt
+
+   .. tab-item:: Red Hat Enterprise Linux
+
+      .. code-block:: shell
+
+         $ sudo dnf install omniperf
+         $ pip install -r /opt/rocm/libexec/omniperf/requirements.txt
+
+   .. tab-item:: SUSE Linux Enterprise Server
+
+      .. code-block:: shell
+
+         $ sudo zypper install omniperf
+         $ pip install -r /opt/rocm/libexec/omniperf/requirements.txt
+
+.. _core-install-rocprof-var:
+
+ROCProfiler
+-----------
+
+Omniperf relies on :doc:`ROCProfiler <rocprofiler:index>`'s ``rocprof`` binary
+during the profiling process. Normally, the path to this binary is detected
+automatically, but you can override the path by the setting the optional
+``ROCPROF`` environment variable.
+
diff --git a/projects/rocprofiler-compute/docs/install/grafana-setup.rst b/projects/rocprofiler-compute/docs/install/grafana-setup.rst
new file mode 100644
index 0000000000..a7486d286d
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/install/grafana-setup.rst
@@ -0,0 +1,209 @@
+.. meta::
+   :description: Omniperf Grafana server installation and deployment
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              install, deploy, Grafana, server, configuration, GUI
+
+****************************************
+Setting up a Grafana server for Omniperf
+****************************************
+
+A Grafana server is *not required* to profile or analyze performance data
+from the CLI. It's a supplementary mechanism to help you import performance
+data and examine it in a detailed
+`Grafana <https://github.com/grafana/grafana>`_ dashboard GUI.
+
+Learn about installing and configuring the main Omniperf tool in
+:ref:`core-install`.
+
+Setting up a Grafana instance for Omniperf requires the following basic software
+dependencies.
+
+* `Docker Engine <https://docs.docker.com/engine/install/>`_
+
+The recommended process for enabling the server-side of Omniperf is to use the
+provided ``Dockerfile`` to build the Grafana and MongoDB instance.
+
+.. _grafana-mongodb-setup:
+
+Set up Grafana and MongoDB
+==========================
+
+Once you've decided where to host the Grafana and MongoDB instance, complete the
+the following setup instructions.
+
+Install MongoDB utilities
+-------------------------
+
+Omniperf uses the
+`mongoimport <https://www.mongodb.com/docs/database-tools/mongoimport/>`_
+utility to upload data to your Grafana instance's backend database.
+
+Use the following commands to install MongoDB utilities for Ubuntu 20.04.
+
+.. code-block:: bash
+
+   $ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb
+   $ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb
+
+.. note::
+
+   Find installation instructions for other distributions in
+   `MongoDB Database Tools Downloads <https://www.mongodb.com/download-center/database-tools/releases/archive>`_.
+
+.. _grafana-persistent-storage-setup:
+
+Set up persistent storage
+-------------------------
+
+Bind MongoDB to a directory on the host OS to create a local backup in case of a
+crash or reset. This is called *creating a persistent volume*.
+
+.. code-block:: bash
+
+   $ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/
+   $ sudo mkdir -p grafana-storage mongodb
+   $ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/grafana-storage --opt o=bind grafana-storage
+   $ sudo docker volume create --driver local --opt type=none --opt device=/usr/local/persist/mongodb --opt o=bind grafana-mongo-db
+
+.. _grafana-docker-container:
+
+Build and launch the Docker container
+-------------------------------------
+
+You're now ready to build your ``Dockerfile``. Navigate to your Omniperf install
+directory to begin.
+
+.. code-block:: bash
+
+   $ cd grafana
+   $ sudo docker-compose build
+   $ sudo docker-compose up -d
+
+The TCP ports for Grafana (``4000``) and MongoDB (``27017``) in the Docker
+container are mapped to ``14000`` and ``27018``, respectively, on the host side.
+
+.. tip::
+
+   In the event that either your Grafana or MongoDB instance crashes fatally,
+   just restart the server. Navigate to your install directory and run:
+
+   .. code-block::
+
+      $ sudo docker-compose down
+      $ sudo docker-compose up -d
+
+.. _grafana-dashboard-setup:
+
+Set up the Grafana dashboard
+----------------------------
+
+Once you've launched your Docker container you should be able to reach Grafana
+at ``http://<host-ip>:14000``. The default login credentials for your first-time
+Grafana setup are:
+
+* **Username**: ``admin``
+* **Password**: ``admin``
+
+.. figure:: ../data/install/grafana_welcome.png
+   :align: center
+   :alt: Grafana dashboard welcome screen
+   :width: 800
+
+   Grafana's welcome screen.
+
+.. _grafana-datasource-setup:
+
+Configure the MongoDB data source
+---------------------------------
+
+You must configure your MongoDB data source in Grafana before first-time use.
+Navigate to Grafana's **Configuration** page to add the "Omniperf Data"
+connection.
+
+.. figure:: ../data/install/datasource_config.jpg
+   :align: center
+   :alt: Grafana data source configuration
+   :width: 800
+
+   Grafana's Configuration page.
+
+Configure the following fields in the data source settings.
+
+.. list-table::
+   :stub-columns: 1
+
+   * - HTTP URL
+     - ``http://localhost:3333``
+
+   * - MongoDB URL
+     - ``mongodb://temp:temp123@\<host-ip>:27018/admin?authSource=admin``
+
+   * - Database Name
+     - ``admin``
+
+After configuring these fields, click **Save & test** to make sure your
+connection is successful.
+
+.. figure:: ../data/install/datasource_settings.jpg
+   :align: center
+   :alt: Grafana data source settings
+   :width: 800
+
+   Grafana data source settings.
+
+.. note::
+
+   To avoid potential DNS issues, you might need to use the actual IP address
+   for the host node in the MongoDB URL.
+
+.. _grafana-import-dashboard-file:
+
+Import the Omniperf dashboard file
+----------------------------------
+
+From the **Create** → **Import** page, upload the dashboard file,
+``/dashboards/Omniperf_v{__VERSION__}_pub.json`` from the
+:doc:`Omniperf tarball <core-install>`.
+
+Edit both the dashboard **Name** and the **Unique identifier (UID)** fields to
+uniquely identify the dashboard. Click **Import** to complete the process.
+
+.. figure:: ../data/install/import_dashboard.png
+   :align: center
+   :alt: Grafana's import dashboard
+   :width: 800
+
+   Grafana's Import dashboard.
+
+.. _grafana-select-workload:
+
+Select and load the Omniperf workload
+-------------------------------------
+
+Once you have imported a dashboard you're ready to begin. Start by browsing
+available dashboards and selecting the dashboard you have just imported.
+
+.. figure:: ../data/install/opening_dashboard.png
+   :align: center
+   :alt: Opening your Omniperf dashboard in Grafana
+   :width: 800
+
+   Opening your Omniperf profiling dashboard in Grafana.
+
+Remember that you need to upload workload data to the MongoDB backend before
+analyzing in your Grafana interface. See a detailed example of this in 
+:ref:`grafana-gui-import`.
+
+After a workload has been successfully uploaded, you should be able to select it
+from the workload dropdown located at the top of your Grafana dashboard.
+
+.. figure:: ../data/install/grafana_workload_selection.png
+   :align: center
+   :alt: Omniperf workload selection in Grafana
+   :width: 800
+
+   Selecting your Omniperf workload in Grafana.
+
+For more information on how to use the Grafana interface for analysis see
+:doc:`/how-to/analyze/grafana-gui`.
+
diff --git a/projects/rocprofiler-compute/docs/license.rst b/projects/rocprofiler-compute/docs/license.rst
new file mode 100644
index 0000000000..c423ed34f1
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/license.rst
@@ -0,0 +1,10 @@
+.. meta::
+   :description: Omniperf license
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              license
+
+*******
+License
+*******
+
+.. include:: ../LICENSE
diff --git a/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst b/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst
new file mode 100644
index 0000000000..b93c720324
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst
@@ -0,0 +1,36 @@
+.. meta::
+   :description: Omniperf support: compatible accelerators and GPUs
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD, GPU
+
+***********************
+Compatible accelerators
+***********************
+
+The following table lists SoCs (System on Chip) tested for compatibility with
+Omniperf. See :doc:`rocm:reference/gpu-arch-specs` for full AMD accelerator and
+GPU specifications.
+
+.. _def-soc:
+
+.. note::
+
+   In Omniperf documentation, the term System on Chip (SoC) refers to a
+   particular family of AMD accelerators.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Platform
+      - Status
+
+    * - AMD Instinct™ MI300
+      - Supported ✅
+
+    * - AMD Instinct MI200
+      - Supported ✅
+
+    * - AMD Instinct MI100
+      - Supported ✅
+
+    * - AMD Instinct MI50, MI60 (Vega 20)
+      - No support ❌
diff --git a/projects/rocprofiler-compute/docs/reference/faq.rst b/projects/rocprofiler-compute/docs/reference/faq.rst
new file mode 100644
index 0000000000..3cbbe778fc
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/reference/faq.rst
@@ -0,0 +1,85 @@
+.. meta::
+    :description: Omniperf FAQ and troubleshooting
+    :keywords: Omniperf, FAQ, troubleshooting, ROCm, profiler, tool, Instinct,
+               accelerator, AMD, SSH, error, version, workaround, help
+
+***
+FAQ
+***
+
+Frequently asked questions and troubleshooting tips.
+
+How do I export profiling data I have already generated using Omniperf?
+=======================================================================
+
+To interact with the Grafana GUI, you must sync data with the MongoDB
+backend. You can do this using :ref:`database <modes-database>` mode.
+
+Pass in the directory of your desired workload as follows.
+
+.. code-block:: shell
+
+    $ omniperf database --import -w <path-to-results> -H <hostname> -u <username> -t <team-name>
+
+python ast error: 'Constant' object has no attribute 'kind'
+===========================================================
+
+This error arises from a bug in the default ``astunparse 1.6.3`` with
+``python 3.8``. The error doesn't seem to occur with Python 3.7 or 3.9.
+
+Workaround:
+
+.. code-block:: shell
+
+   $ pip3 uninstall astunparse
+   $ pip3 astunparse
+
+tabulate doesn't print properly
+===============================
+
+To get around this issue, set the following environment variables to update your
+locale settings.
+
+.. code-block:: shell
+
+   $ export LC_ALL=C.UTF-8
+   $ export LANG=C.UTF-8
+
+How can I SSH tunnel in MobaXterm?
+==================================
+
+1. Open MobaXterm.
+2. In the top ribbon, select **Tunneling** to access tunneling options.
+
+   .. image:: ../data/faq/tunnel_demo1.png
+      :align: center
+      :alt: MobaXterm Tunnel button
+      :width: 800
+
+   This pop-up should appear.
+
+   .. image:: ../data/faq/tunnel_demo2.png
+      :align: center
+      :alt: MobaXterm pop-up
+      :width: 800
+
+3. Select **New SSH tunnel**.
+
+   .. image:: ../data/faq/tunnel_demo3.png
+      :align: center
+      :alt: MobaXterm pop-up
+      :width: 800
+
+4. Configure the SSH tunnel.
+
+   Local clients
+     * ``<Forwarded port>``: ``[PORT]``
+
+   Remote server
+     * ``<Remote server>``: ``localhost``
+     * ``<Remote port>``: ``[PORT]``
+
+   SSH server
+     * ``<SSH server>``: *name of the server to connect to*
+     * ``<SSH login>``: *username to login to the server*
+     * ``<SSH port>``: ``22``
diff --git a/projects/rocprofiler-compute/docs/sphinx/_toc.yml.in b/projects/rocprofiler-compute/docs/sphinx/_toc.yml.in
new file mode 100644
index 0000000000..eb863b7a39
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/sphinx/_toc.yml.in
@@ -0,0 +1,60 @@
+# Anywhere {branch} is used, the branch name will be substituted.
+# These comments will also be removed.
+defaults:
+  numbered: False
+  maxdepth: 6
+root: index
+subtrees:
+  - entries:
+    - file: what-is-omniperf.rst
+
+  - caption: Install
+    entries:
+    - file: install/core-install.rst
+    - file: install/grafana-setup.rst
+      title: Grafana server for Omniperf
+
+  - caption: How to
+    entries:
+    - file: how-to/use.rst
+    - file: how-to/profile/mode.rst
+    - file: how-to/analyze/mode.rst
+      entries:
+      - file: how-to/analyze/cli.rst
+      - file: how-to/analyze/grafana-gui.rst
+      - file: how-to/analyze/standalone-gui.rst
+
+  - caption: Conceptual
+    entries:
+    - file: conceptual/performance-model.rst
+      entries:
+      - file: conceptual/compute-unit.rst
+        title: Compute unit
+        entries:
+        - file: conceptual/pipeline-descriptions.rst
+        - file: conceptual/pipeline-metrics.rst
+        - file: conceptual/local-data-share.rst
+          title: Local data share
+        - file: conceptual/vector-l1-cache.rst
+          title: Vector L1 cache
+      - file: conceptual/l2-cache.rst
+        title: L2 cache
+      - file: conceptual/shader-engine.rst
+        title: Shader engine
+      - file: conceptual/command-processor.rst
+        title: Command processor
+      - file: conceptual/system-speed-of-light.rst
+        title: System Speed-of-Light
+      - file: conceptual/references.rst
+    - file: conceptual/definitions.rst
+
+  - caption: Tutorials
+    entries:
+    - file: tutorial/profiling-by-example.rst
+    - file: tutorial/learning-resources.rst
+
+  - caption: Reference
+    entries:
+    - file: reference/compatible-accelerators.rst
+    - file: reference/faq.rst
+    - file: license.rst
diff --git a/projects/rocprofiler-compute/docs/sphinx/requirements.in b/projects/rocprofiler-compute/docs/sphinx/requirements.in
new file mode 100644
index 0000000000..e503806ca1
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/sphinx/requirements.in
@@ -0,0 +1,2 @@
+rocm-docs-core==1.6.1
+sphinxcontrib.datatemplates==0.11.0
diff --git a/projects/rocprofiler-compute/docs/sphinx/requirements.txt b/projects/rocprofiler-compute/docs/sphinx/requirements.txt
new file mode 100644
index 0000000000..82d64eb291
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/sphinx/requirements.txt
@@ -0,0 +1,156 @@
+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+#    pip-compile requirements.in
+#
+accessible-pygments==0.0.5
+    # via pydata-sphinx-theme
+alabaster==0.7.16
+    # via sphinx
+babel==2.15.0
+    # via
+    #   pydata-sphinx-theme
+    #   sphinx
+beautifulsoup4==4.12.3
+    # via pydata-sphinx-theme
+breathe==4.35.0
+    # via rocm-docs-core
+certifi==2024.7.4
+    # via requests
+cffi==1.16.0
+    # via
+    #   cryptography
+    #   pynacl
+charset-normalizer==3.3.2
+    # via requests
+click==8.1.7
+    # via sphinx-external-toc
+cryptography==43.0.0
+    # via pyjwt
+defusedxml==0.7.1
+    # via sphinxcontrib-datatemplates
+deprecated==1.2.14
+    # via pygithub
+docutils==0.21.2
+    # via
+    #   breathe
+    #   myst-parser
+    #   pydata-sphinx-theme
+    #   sphinx
+fastjsonschema==2.20.0
+    # via rocm-docs-core
+gitdb==4.0.11
+    # via gitpython
+gitpython==3.1.43
+    # via rocm-docs-core
+idna==3.7
+    # via requests
+imagesize==1.4.1
+    # via sphinx
+jinja2==3.1.4
+    # via
+    #   myst-parser
+    #   sphinx
+markdown-it-py==3.0.0
+    # via
+    #   mdit-py-plugins
+    #   myst-parser
+markupsafe==2.1.5
+    # via jinja2
+mdit-py-plugins==0.4.1
+    # via myst-parser
+mdurl==0.1.2
+    # via markdown-it-py
+myst-parser==3.0.1
+    # via rocm-docs-core
+packaging==24.1
+    # via
+    #   pydata-sphinx-theme
+    #   sphinx
+pycparser==2.22
+    # via cffi
+pydata-sphinx-theme==0.15.4
+    # via
+    #   rocm-docs-core
+    #   sphinx-book-theme
+pygithub==2.3.0
+    # via rocm-docs-core
+pygments==2.18.0
+    # via
+    #   accessible-pygments
+    #   pydata-sphinx-theme
+    #   sphinx
+pyjwt[crypto]==2.8.0
+    # via pygithub
+pynacl==1.5.0
+    # via pygithub
+pyyaml==6.0.1
+    # via
+    #   myst-parser
+    #   rocm-docs-core
+    #   sphinx-external-toc
+    #   sphinxcontrib-datatemplates
+requests==2.32.3
+    # via
+    #   pygithub
+    #   sphinx
+rocm-docs-core==1.6.1
+    # via -r requirements.in
+smmap==5.0.1
+    # via gitdb
+snowballstemmer==2.2.0
+    # via sphinx
+soupsieve==2.5
+    # via beautifulsoup4
+sphinx==7.4.7
+    # via
+    #   breathe
+    #   myst-parser
+    #   pydata-sphinx-theme
+    #   rocm-docs-core
+    #   sphinx-book-theme
+    #   sphinx-copybutton
+    #   sphinx-design
+    #   sphinx-external-toc
+    #   sphinx-notfound-page
+    #   sphinxcontrib-datatemplates
+    #   sphinxcontrib-runcmd
+sphinx-book-theme==1.1.3
+    # via rocm-docs-core
+sphinx-copybutton==0.5.2
+    # via rocm-docs-core
+sphinx-design==0.6.0
+    # via rocm-docs-core
+sphinx-external-toc==1.0.1
+    # via rocm-docs-core
+sphinx-notfound-page==1.0.2
+    # via rocm-docs-core
+sphinxcontrib-applehelp==1.0.8
+    # via sphinx
+sphinxcontrib-datatemplates==0.11.0
+    # via -r requirements.in
+sphinxcontrib-devhelp==1.0.6
+    # via sphinx
+sphinxcontrib-htmlhelp==2.0.6
+    # via sphinx
+sphinxcontrib-jsmath==1.0.1
+    # via sphinx
+sphinxcontrib-qthelp==1.0.8
+    # via sphinx
+sphinxcontrib-runcmd==0.2.0
+    # via sphinxcontrib-datatemplates
+sphinxcontrib-serializinghtml==1.1.10
+    # via sphinx
+tomli==2.0.1
+    # via sphinx
+typing-extensions==4.12.2
+    # via
+    #   pydata-sphinx-theme
+    #   pygithub
+urllib3==2.2.2
+    # via
+    #   pygithub
+    #   requests
+wrapt==1.16.0
+    # via deprecated
diff --git a/projects/rocprofiler-compute/docs/sphinx/static/css/o_custom.css b/projects/rocprofiler-compute/docs/sphinx/static/css/o_custom.css
new file mode 100644
index 0000000000..a6cbe5718f
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/sphinx/static/css/o_custom.css
@@ -0,0 +1,30 @@
+:root {
+    --amd-teal-500: #00C2DE;
+    --amd-teal-750: #00788E;
+}
+
+/* Override PyData Sphinx Theme default colors */
+html[data-theme='light'] {
+    --pst-color-primary: var(--amd-teal-750);
+    --pst-color-primary-bg: var(--amd-teal-500);
+    --pst-color-table-row-hover-bg: #E2E8F0;
+}
+
+html[data-theme='dark'] {
+    --pst-color-primary: var(--amd-teal-500);
+    --pst-color-primary-bg: var(--amd-teal-750);
+    --pst-color-table-row-hover-bg: #1E293B;
+}
+
+html[data-theme='light'],
+html[data-theme='dark'] {
+    --pst-color-link: var(--pst-color-primary);
+}
+
+a svg {
+  color: var(--pst-color-text-base);
+}
+
+a svg:hover {
+  color: var(--pst-color-link-hover);
+}
diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/infinity-fabric-transactions.rst b/projects/rocprofiler-compute/docs/tutorial/includes/infinity-fabric-transactions.rst
new file mode 100644
index 0000000000..b60355bf7b
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/includes/infinity-fabric-transactions.rst
@@ -0,0 +1,675 @@
+.. _infinity-fabric-example:
+
+Infinity Fabric transactions
+============================
+
+ For this example, consider the
+ :dev-sample:`Infinity Fabric™ sample <fabric.hip>` distributed as a part of
+ Omniperf.
+
+This following code snippet launches a simple read-only kernel.
+
+.. code-block:: cpp
+
+   // the main streaming kernel
+   __global__ void kernel(int* x, size_t N, int zero) {
+     int sum = 0;
+     const size_t offset_start = threadIdx.x + blockIdx.x * blockDim.x;
+     for (int i = 0; i < 10; ++i) {
+       for (size_t offset = offset_start; offset < N; offset += blockDim.x * gridDim.x) {
+         sum += x[offset];
+       }
+     }
+     if (sum != 0) {
+       x[offset_start] = sum;
+     }
+   }
+
+This happens twice -- once as a warm-up and once for analysis. Note that the
+buffer ``x`` is initialized to all zeros via a call to ``hipMemcpy`` on the
+host before the kernel is ever launched. Therefore, the following conditional
+is identically false -- and thus we expect no writes.
+
+.. code-block:: cpp
+
+   if (sum != 0) { ...
+
+.. note::
+
+   The actual sample included with Omniperf also includes the ability to select
+   different operation types (such as atomics, writes). This abbreviated version
+   is presented here for reference only.
+
+Finally, this sample code lets the user control the
+:ref:`granularity of an allocation <memory-type>`, the owner of an allocation
+(local HBM, CPU DRAM or remote HBM), and the size of an allocation (the default
+is :math:`\sim4`\ GiB) via command line arguments. In doing so, we can explore
+the impact of these parameters on the L2-Fabric metrics reported by Omniperf to
+further understand their meaning.
+
+.. note::
+
+   All results in this section were generated an a node of Infinity
+   Fabric connected MI250 accelerators using ROCm version 5.6.0, and Omniperf
+   version 2.0.0. Although results may vary with ROCm versions and accelerator
+   connectivity, we expect the lessons learned here to be broadly applicable.
+
+.. _infinity-fabric-ex1:
+
+Experiment 1:  Coarse-grained, accelerator-local HBM reads
+-----------------------------------------------------------
+
+In our first experiment, we consider the simplest possible case, a
+``hipMalloc``\ ’d buffer that is local to our current accelerator:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n coarse_grained_local --no-roof -- ./fabric -t 1 -o 0
+   Using:
+     mtype:CoarseGrained
+     mowner:Device
+     mspace:Global
+     mop:Read
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/coarse_grained_local/mi200 -b 17.2.0 17.2.1 17.2.2 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4 -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕
+   │ Index   │ Metric              │            Avg │            Min │            Max │ Unit             │
+   ╞═════════╪═════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡
+   │ 17.2.0  │ L2-Fabric Read BW   │ 42947428672.00 │ 42947428672.00 │ 42947428672.00 │ Bytes per kernel │
+   ├─────────┼─────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.1  │ HBM Read Traffic    │         100.00 │         100.00 │         100.00 │ Pct              │
+   ├─────────┼─────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.2  │ Remote Read Traffic │           0.00 │           0.00 │           0.00 │ Pct              │
+   ╘═════════╧═════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                        │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.0  │ Read - PCIe Stall             │ PCIe Stall             │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.1  │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.2  │ Read - HBM Stall              │ HBM Stall              │ Read          │  0.07 │  0.07 │  0.07 │ Pct    │
+   ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕
+   │ Index   │ Metric          │          Avg │          Min │          Max │ Unit           │
+   ╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡
+   │ 17.5.0  │ Read (32B)      │         0.00 │         0.00 │         0.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.1  │ Read (Uncached) │      1450.00 │      1450.00 │      1450.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.2  │ Read (64B)      │ 671053573.00 │ 671053573.00 │ 671053573.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.3  │ HBM Read        │ 671053565.00 │ 671053565.00 │ 671053565.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.4  │ Remote Read     │         8.00 │         8.00 │         8.00 │ Req per kernel │
+   ╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛
+
+Here, you can make the following observations.
+
+- The vast majority of L2-Fabric requests (>99%) are 64B
+  read requests (**17.5.2**).
+
+- Nearly 100% of the read requests (**17.2.1**) are homed in on the
+  accelerator-local HBM (**17.5.3**), while some small fraction of these reads are
+  routed to a “remote” device (**17.5.4**).
+
+- These drive a :math:`\sim40`\ GiB per kernel read-bandwidth (**17.2.0**).
+
+In addition, we see a small amount of :ref:`uncached <memory-type>` reads
+(**17.5.1**), these correspond to things like:
+
+* The assembly code to execute the kernel
+
+* Kernel arguments
+
+* Coordinate parameters (such as ``blockDim.z``) that were not initialized by the
+  hardware, etc. and may account for some of our "remote" read requests
+  (**17.5.4**), for example, reading from CPU DRAM
+
+The above list is not exhaustive, nor are all of these guaranteed to be
+"uncached" – the exact implementation depends on the accelerator and
+ROCm versions used. These read requests could be interrogated further in
+the :ref:`Scalar L1 Data Cache <desc-sl1d>` and
+:ref:`Instruction Cache <desc-l1i>` metric sections.
+
+.. note::
+
+   The Traffic metrics in Sec **17.2** are presented as a percentage of the total
+   number of requests. For example, "HBM Read Traffic" is the percent of read requests
+   (**17.5.0** - **17.5.2**) that were directed to the accelerators' local HBM (**17.5.3**).
+
+.. _infinity-fabric-ex2:
+
+Experiment 2: Fine-grained, accelerator-local HBM reads
+---------------------------------------------------------
+
+In this experiment, we change the :ref:`granularity <memory-type>` of our
+device-allocation to be fine-grained device memory, local to the current
+accelerator. Our code uses the ``hipExtMallocWithFlag`` API with the
+``hipDeviceMallocFinegrained`` flag to accomplish this.
+
+.. note::
+
+   On some systems (such as those with only PCIe® connected accelerators), you need
+   to set the environment variable ``HSA_FORCE_FINE_GRAIN_PCIE=1`` to enable
+   this memory type.
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n fine_grained_local --no-roof -- ./fabric -t 0 -o 0
+   Using:
+     mtype:FineGrained
+     mowner:Device
+     mspace:Global
+     mop:Read
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/fine_grained_local/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4  -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕
+   │ Index   │ Metric                │            Avg │            Min │            Max │ Unit             │
+   ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡
+   │ 17.2.0  │ L2-Fabric Read BW     │ 42948661824.00 │ 42948661824.00 │ 42948661824.00 │ Bytes per kernel │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.1  │ HBM Read Traffic      │         100.00 │         100.00 │         100.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.2  │ Remote Read Traffic   │           0.00 │           0.00 │           0.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.3  │ Uncached Read Traffic │           0.00 │           0.00 │           0.00 │ Pct              │
+   ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                        │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.0  │ Read - PCIe Stall             │ PCIe Stall             │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.1  │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.2  │ Read - HBM Stall              │ HBM Stall              │ Read          │  0.07 │  0.07 │  0.07 │ Pct    │
+   ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕
+   │ Index   │ Metric          │          Avg │          Min │          Max │ Unit           │
+   ╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡
+   │ 17.5.0  │ Read (32B)      │         0.00 │         0.00 │         0.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.1  │ Read (Uncached) │      1334.00 │      1334.00 │      1334.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.2  │ Read (64B)      │ 671072841.00 │ 671072841.00 │ 671072841.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.3  │ HBM Read        │ 671072835.00 │ 671072835.00 │ 671072835.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.4  │ Remote Read     │         6.00 │         6.00 │         6.00 │ Req per kernel │
+   ╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛
+
+Comparing with our :ref:`previous example <infinity-fabric-ex1>`, we see a
+relatively similar result, namely:
+
+- The vast majority of L2-Fabric requests are 64B read requests (**17.5.2**)
+
+- Nearly all these read requests are directed to the accelerator-local HBM (**17.2.1**)
+
+In addition, we now see a small percentage of HBM Read Stalls (**17.4.2**),
+as streaming fine-grained memory is putting more stress on Infinity
+Fabric.
+
+.. note::
+
+   The stalls in Sec 17.4 are presented as a percentage of the total number
+   active L2 cycles, summed over :doc:`all L2 channels </conceptual/l2-cache>`.
+
+.. _infinity-fabric-ex3:
+
+Experiment 3: Fine-grained, remote-accelerator HBM reads
+----------------------------------------------------------
+
+In this experiment, we move our :ref:`fine-grained <memory-type>` allocation to
+be owned by a remote accelerator. We accomplish this by first changing
+the HIP device using, for instance, the ``hipSetDevice(1)`` API, then allocating
+fine-grained memory (as described :ref:`previously <infinity-fabric-ex2>`), and
+finally resetting the device back to the default, for instance,
+``hipSetDevice(0)``.
+
+Although we have not changed our code significantly, we do see a
+substantial change in the L2-Fabric metrics:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n fine_grained_remote --no-roof -- ./fabric -t 0 -o 2
+   Using:
+     mtype:FineGrained
+     mowner:Remote
+     mspace:Global
+     mop:Read
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/fine_grained_remote/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4  -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕
+   │ Index   │ Metric                │            Avg │            Min │            Max │ Unit             │
+   ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡
+   │ 17.2.0  │ L2-Fabric Read BW     │ 42949692736.00 │ 42949692736.00 │ 42949692736.00 │ Bytes per kernel │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.1  │ HBM Read Traffic      │           0.00 │           0.00 │           0.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.2  │ Remote Read Traffic   │         100.00 │         100.00 │         100.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.3  │ Uncached Read Traffic │         200.00 │         200.00 │         200.00 │ Pct              │
+   ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                        │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.0  │ Read - PCIe Stall             │ PCIe Stall             │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.1  │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read          │ 17.85 │ 17.85 │ 17.85 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.2  │ Read - HBM Stall              │ HBM Stall              │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════╤═══════════════╤═══════════════╤═══════════════╤════════════════╕
+   │ Index   │ Metric          │           Avg │           Min │           Max │ Unit           │
+   ╞═════════╪═════════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╡
+   │ 17.5.0  │ Read (32B)      │          0.00 │          0.00 │          0.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.1  │ Read (Uncached) │ 1342177894.00 │ 1342177894.00 │ 1342177894.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.2  │ Read (64B)      │  671088949.00 │  671088949.00 │  671088949.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.3  │ HBM Read        │        307.00 │        307.00 │        307.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.4  │ Remote Read     │  671088642.00 │  671088642.00 │  671088642.00 │ Req per kernel │
+   ╘═════════╧═════════════════╧═══════════════╧═══════════════╧═══════════════╧════════════════╛
+
+First, we see that while we still observe approximately the same number
+of 64B Read Requests (**17.5.2**), we now see an even larger number of
+Uncached Read Requests (**17.5.3**). Some simple division reveals:
+
+.. math::
+
+   342177894.00 / 671088949.00 ≈ 2
+
+That is, each 64B Read Request is *also* counted as two Uncached Read
+Requests, as reflected in the :ref:`request-flow diagram <l2-request-flow>`.
+This is also why the Uncached Read Traffic metric (**17.2.3**) is at the
+counter-intuitive value of 200%!
+
+In addition, observe that:
+
+- We no longer see any significant number of HBM Read Requests (**17.2.1**,
+  **17.5.3**), nor HBM Read Stalls (**17.4.2**), but instead,
+
+- we see that almost all of these requests are considered “remote”
+  (**17.2.2**, **17.5.4**) are being routed to another
+  accelerator, or the CPU — in this case HIP Device 1 — and,
+
+- we see a significantly larger percentage of AMD Infinity Fabric Read Stalls
+  (**17.4.1**) as compared to the HBM Read Stalls in the
+  :ref:`previous example <infinity-fabric-ex2>`.
+
+These stalls correspond to reads that are going out over the AMD
+Infinity Fabric connection to another MI250 accelerator. In
+addition, because these are crossing between accelerators, we expect
+significantly lower achievable bandwidths as compared to the local
+accelerator’s HBM – this is reflected (indirectly) in the magnitude of
+the stall metric (**17.4.1**). Finally, we note that if our system contained
+only PCIe connected accelerators, these observations will differ.
+
+.. _infinity-fabric-ex4:
+
+Experiment 4: Fine-grained, CPU-DRAM reads
+--------------------------------------------
+
+In this experiment, we move our :ref:`fine-grained <memory-type>` allocation to
+be owned by the CPU’s DRAM. We accomplish this by allocating host-pinned
+fine-grained memory using the ``hipHostMalloc`` API:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n fine_grained_host --no-roof -- ./fabric -t 0 -o 1
+   Using:
+     mtype:FineGrained
+     mowner:Host
+     mspace:Global
+     mop:Read
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/fine_grained_host/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4  -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕
+   │ Index   │ Metric                │            Avg │            Min │            Max │ Unit             │
+   ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡
+   │ 17.2.0  │ L2-Fabric Read BW     │ 42949691264.00 │ 42949691264.00 │ 42949691264.00 │ Bytes per kernel │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.1  │ HBM Read Traffic      │           0.00 │           0.00 │           0.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.2  │ Remote Read Traffic   │         100.00 │         100.00 │         100.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.3  │ Uncached Read Traffic │         200.00 │         200.00 │         200.00 │ Pct              │
+   ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                        │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.0  │ Read - PCIe Stall             │ PCIe Stall             │ Read          │ 91.29 │ 91.29 │ 91.29 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.1  │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.2  │ Read - HBM Stall              │ HBM Stall              │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════╤═══════════════╤═══════════════╤═══════════════╤════════════════╕
+   │ Index   │ Metric          │           Avg │           Min │           Max │ Unit           │
+   ╞═════════╪═════════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╡
+   │ 17.5.0  │ Read (32B)      │          0.00 │          0.00 │          0.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.1  │ Read (Uncached) │ 1342177848.00 │ 1342177848.00 │ 1342177848.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.2  │ Read (64B)      │  671088926.00 │  671088926.00 │  671088926.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.3  │ HBM Read        │        284.00 │        284.00 │        284.00 │ Req per kernel │
+   ├─────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────────┤
+   │ 17.5.4  │ Remote Read     │  671088642.00 │  671088642.00 │  671088642.00 │ Req per kernel │
+   ╘═════════╧═════════════════╧═══════════════╧═══════════════╧═══════════════╧════════════════╛
+
+Here we see *almost* the same results as in the
+:ref:`previous experiment <infinity-fabric-ex3>`, however now as we are crossing
+a PCIe bus to the CPU, we see that the Infinity Fabric Read stalls (**17.4.1**)
+have shifted to be a PCIe stall (**17.4.2**). In addition, as (on this
+system) the PCIe bus has a lower peak bandwidth than the AMD Infinity
+Fabric connection between two accelerators, we once again observe an
+increase in the percentage of stalls on this interface.
+
+.. note::
+
+   Had we performed this same experiment on an
+   `MI250X system <https://www.amd.com/system/files/documents/amd-cdna2-white-paper.pdf>`_,
+   these transactions would again have been marked as Infinity Fabric Read
+   stalls (**17.4.1**), as the CPU is connected to the accelerator via AMD Infinity
+   Fabric.
+
+.. _infinity-fabric-ex5:
+
+Experiment 5: Coarse-grained, CPU-DRAM reads
+----------------------------------------------
+
+In our next fabric experiment, we change our CPU memory allocation to be
+`coarse-grained <Mtype>`__. We accomplish this by passing the
+``hipHostMalloc`` API the ``hipHostMallocNonCoherent`` flag, to mark the
+allocation as coarse-grained:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n coarse_grained_host --no-roof -- ./fabric -t 1 -o 1
+   Using:
+     mtype:CoarseGrained
+     mowner:Host
+     mspace:Global
+     mop:Read
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/coarse_grained_host/mi200 -b 17.2.0 17.2.1 17.2.2 17.2.3 17.4.0 17.4.1 17.4.2 17.5.0 17.5.1 17.5.2 17.5.3 17.5.4  -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═══════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕
+   │ Index   │ Metric                │            Avg │            Min │            Max │ Unit             │
+   ╞═════════╪═══════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡
+   │ 17.2.0  │ L2-Fabric Read BW     │ 42949691264.00 │ 42949691264.00 │ 42949691264.00 │ Bytes per kernel │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.1  │ HBM Read Traffic      │           0.00 │           0.00 │           0.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.2  │ Remote Read Traffic   │         100.00 │         100.00 │         100.00 │ Pct              │
+   ├─────────┼───────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.3  │ Uncached Read Traffic │           0.00 │           0.00 │           0.00 │ Pct              │
+   ╘═════════╧═══════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤═══════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                        │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪═══════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.0  │ Read - PCIe Stall             │ PCIe Stall             │ Read          │ 91.27 │ 91.27 │ 91.27 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.1  │ Read - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼───────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.2  │ Read - HBM Stall              │ HBM Stall              │ Read          │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧═══════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕
+   │ Index   │ Metric          │          Avg │          Min │          Max │ Unit           │
+   ╞═════════╪═════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡
+   │ 17.5.0  │ Read (32B)      │         0.00 │         0.00 │         0.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.1  │ Read (Uncached) │       562.00 │       562.00 │       562.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.2  │ Read (64B)      │ 671088926.00 │ 671088926.00 │ 671088926.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.3  │ HBM Read        │       281.00 │       281.00 │       281.00 │ Req per kernel │
+   ├─────────┼─────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.4  │ Remote Read     │ 671088645.00 │ 671088645.00 │ 671088645.00 │ Req per kernel │
+   ╘═════════╧═════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛
+
+Here we see a similar result to our
+:ref:`previous experiment <infinity-fabric-ex4>`, with one key difference: our
+accesses are no longer marked as Uncached Read requests (**17.2.3, 17.5.1**), but instead
+are 64B read requests (**17.5.2**), as observed in our
+:ref:`Coarse-grained, accelerator-local HBM <infinity-fabric-ex1>` experiment.
+
+.. _infinity-fabric-ex6:
+
+Experiment 6: Fine-grained, CPU-DRAM writes
+--------------------------------------------
+
+Thus far in our exploration of the L2-Fabric interface, we have
+primarily focused on read operations. However, in
+:ref:`our request flow diagram <l2-request-flow>`, we note that writes are
+counted separately. To observe this, we use the ``-p`` flag to trigger write
+operations to fine-grained memory allocated on the host:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n fine_grained_host_write --no-roof -- ./fabric -t 0 -o 1 -p 1
+   Using:
+     mtype:FineGrained
+     mowner:Host
+     mspace:Global
+     mop:Write
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/fine_grained_host_writes/mi200 -b 17.2.4 17.2.5 17.2.6 17.2.7 17.2.8 17.4.3 17.4.4 17.4.5 17.4.6 17.5.5 17.5.6 17.5.7 17.5.8 17.5.9 17.5.10 -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═══════════════════════════════════╤════════════════╤════════════════╤════════════════╤══════════════════╕
+   │ Index   │ Metric                            │            Avg │            Min │            Max │ Unit             │
+   ╞═════════╪═══════════════════════════════════╪════════════════╪════════════════╪════════════════╪══════════════════╡
+   │ 17.2.4  │ L2-Fabric Write and Atomic BW     │ 42949672960.00 │ 42949672960.00 │ 42949672960.00 │ Bytes per kernel │
+   ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.5  │ HBM Write and Atomic Traffic      │           0.00 │           0.00 │           0.00 │ Pct              │
+   ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.6  │ Remote Write and Atomic Traffic   │         100.00 │         100.00 │         100.00 │ Pct              │
+   ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.7  │ Atomic Traffic                    │           0.00 │           0.00 │           0.00 │ Pct              │
+   ├─────────┼───────────────────────────────────┼────────────────┼────────────────┼────────────────┼──────────────────┤
+   │ 17.2.8  │ Uncached Write and Atomic Traffic │         100.00 │         100.00 │         100.00 │ Pct              │
+   ╘═════════╧═══════════════════════════════════╧════════════════╧════════════════╧════════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤════════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                         │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪════════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.3  │ Write - PCIe Stall             │ PCIe Stall             │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.4  │ Write - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.5  │ Write - HBM Stall              │ HBM Stall              │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.6  │ Write - Credit Starvation      │ Credit Starvation      │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧════════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════════════╤══════════════╤══════════════╤══════════════╤════════════════╕
+   │ Index   │ Metric                  │          Avg │          Min │          Max │ Unit           │
+   ╞═════════╪═════════════════════════╪══════════════╪══════════════╪══════════════╪════════════════╡
+   │ 17.5.5  │ Write (32B)             │         0.00 │         0.00 │         0.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.6  │ Write (Uncached)        │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.7  │ Write (64B)             │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.8  │ HBM Write and Atomic    │         0.00 │         0.00 │         0.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.9  │ Remote Write and Atomic │ 671088640.00 │ 671088640.00 │ 671088640.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼──────────────┼──────────────┼──────────────┼────────────────┤
+   │ 17.5.10 │ Atomic                  │         0.00 │         0.00 │         0.00 │ Req per kernel │
+   ╘═════════╧═════════════════════════╧══════════════╧══════════════╧══════════════╧════════════════╛
+
+Here we notice a few changes in our request pattern:
+
+* As expected, the requests have changed from 64B Reads to 64B Write requests
+  (**17.5.7**),
+
+* these requests are homed in on a “remote” destination (**17.2.6, 17.5.9**), as
+  expected, and
+
+* these are also counted as a single Uncached Write request (**17.5.6**).
+
+In addition, there are rather significant changes in the bandwidth values
+reported:
+
+- The “L2-Fabric Write and Atomic” bandwidth metric (**17.2.4**)
+  reports about 40GiB of data written across Infinity Fabric while
+
+- The “Remote Write and Traffic” metric (**17.2.5**) indicates that nearly
+  100% of these request are being directed to a remote source.
+
+The precise meaning of these metrics are explored in the
+:ref:`subsequent experiment <infinity-fabric-ex7>`.
+
+Finally, we note that we see no write stalls on the PCIe bus
+(**17.4.3**). This is because writes over a PCIe bus `are
+non-posted <https://members.pcisig.com/wg/PCI-SIG/document/10912>`_,
+that is, they do not require acknowledgement.
+
+.. _infinity-fabric-ex7:
+
+Experiment 7: Fine-grained, CPU-DRAM atomicAdd
+------------------------------------------------
+
+Next, we change our experiment to instead target ``atomicAdd``
+operations to the CPU’s DRAM.
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n fine_grained_host_add --no-roof -- ./fabric -t 0 -o 1 -p 2
+   Using:
+     mtype:FineGrained
+     mowner:Host
+     mspace:Global
+     mop:Add
+     mdata:Unsigned
+     remoteId:-1
+   <...>
+   $ omniperf analyze -p workloads/fine_grained_host_add/mi200 -b 17.2.4 17.2.5 17.2.6 17.2.7 17.2.8 17.4.3 17.4.4 17.4.5 17.4.6 17.5.5 17.5.6 17.5.7 17.5.8 17.5.9 17.5.10 -n per_kernel --dispatch 2
+   <...>
+   17. L2 Cache
+   17.2 L2 - Fabric Transactions
+   ╒═════════╤═══════════════════════════════════╤══════════════╤══════════════╤══════════════╤══════════════════╕
+   │ Index   │ Metric                            │          Avg │          Min │          Max │ Unit             │
+   ╞═════════╪═══════════════════════════════════╪══════════════╪══════════════╪══════════════╪══════════════════╡
+   │ 17.2.4  │ L2-Fabric Write and Atomic BW     │ 429496736.00 │ 429496736.00 │ 429496736.00 │ Bytes per kernel │
+   ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤
+   │ 17.2.5  │ HBM Write and Atomic Traffic      │         0.00 │         0.00 │         0.00 │ Pct              │
+   ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤
+   │ 17.2.6  │ Remote Write and Atomic Traffic   │       100.00 │       100.00 │       100.00 │ Pct              │
+   ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤
+   │ 17.2.7  │ Atomic Traffic                    │       100.00 │       100.00 │       100.00 │ Pct              │
+   ├─────────┼───────────────────────────────────┼──────────────┼──────────────┼──────────────┼──────────────────┤
+   │ 17.2.8  │ Uncached Write and Atomic Traffic │       100.00 │       100.00 │       100.00 │ Pct              │
+   ╘═════════╧═══════════════════════════════════╧══════════════╧══════════════╧══════════════╧══════════════════╛
+   17.4 L2 - Fabric Interface Stalls
+   ╒═════════╤════════════════════════════════╤════════════════════════╤═══════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                         │ Type                   │ Transaction   │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪════════════════════════════════╪════════════════════════╪═══════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 17.4.3  │ Write - PCIe Stall             │ PCIe Stall             │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.4  │ Write - Infinity Fabric™ Stall │ Infinity Fabric™ Stall │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.5  │ Write - HBM Stall              │ HBM Stall              │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────┼────────────────────────┼───────────────┼───────┼───────┼───────┼────────┤
+   │ 17.4.6  │ Write - Credit Starvation      │ Credit Starvation      │ Write         │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧════════════════════════════════╧════════════════════════╧═══════════════╧═══════╧═══════╧═══════╧════════╛
+   17.5 L2 - Fabric Detailed Transaction Breakdown
+   ╒═════════╤═════════════════════════╤═════════════╤═════════════╤═════════════╤════════════════╕
+   │ Index   │ Metric                  │         Avg │         Min │         Max │ Unit           │
+   ╞═════════╪═════════════════════════╪═════════════╪═════════════╪═════════════╪════════════════╡
+   │ 17.5.5  │ Write (32B)             │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤
+   │ 17.5.6  │ Write (Uncached)        │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤
+   │ 17.5.7  │ Write (64B)             │        0.00 │        0.00 │        0.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤
+   │ 17.5.8  │ HBM Write and Atomic    │        0.00 │        0.00 │        0.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤
+   │ 17.5.9  │ Remote Write and Atomic │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │
+   ├─────────┼─────────────────────────┼─────────────┼─────────────┼─────────────┼────────────────┤
+   │ 17.5.10 │ Atomic                  │ 13421773.00 │ 13421773.00 │ 13421773.00 │ Req per kernel │
+   ╘═════════╧═════════════════════════╧═════════════╧═════════════╧═════════════╧════════════════╛
+
+In this case, there is quite a lot to unpack:
+
+- For the first time, the 32B Write requests (**17.5.5**) are heavily used.
+
+- These correspond to Atomic requests (**17.2.7, 17.5.10**), and are counted as
+  Uncached Writes (**17.5.6**).
+
+- The L2-Fabric Write and Atomic bandwidth metric (**17.2.4**) shows about 0.4
+  GiB of traffic. For convenience, the sample reduces the default problem size
+  for this case due to the speed of atomics across a PCIe bus, and finally,
+
+- The traffic is directed to a remote device (**17.2.6, 17.5.9**).
+
+Let's consider what an “atomic” request means in this context. Recall
+that we are discussing memory traffic flowing from the L2 cache, the
+device-wide coherence point on current CDNA accelerators such as the
+MI250, to for example, the CPU’s DRAM. In this light, we see that these
+requests correspond to *system scope* atomics, and specifically in the
+case of the MI250, to fine-grained memory.
+
+
+.. rubric:: Disclaimer
+
+PCIe® is a registered trademark of PCI-SIG Corporation.
+
+..
+   `Leave as possible future experiment to add
+
+
+   ### Experiment #2 - Non-temporal writes
+
+   If we take the same code (for convenience only) as previously described, we can demonstrate how to achieve 'streaming' writes, as described in the [L2 Cache Access metrics](L2_cache_metrics) section.
+   To see this, we use the Clang built-in [`__builtin_nontemporal_store`](https://clang.llvm.org/docs/LanguageExtensions.html#non-temporal-load-store-builtins), for example
+
+   ```
+   template<typename T>
+   __device__ void store (T* ptr, T val) {
+    __builtin_nontemporal_store(val, ptr);
+   }
+   ```
+
+   On an AMD MI2XX accelerator, for FP32 values this will generate a `global_store_dword` instruction, with the `glc` and `slc` bits set, described in [section 10.1](https://developer.amd.com/wp-content/resources/CDNA2_Shader_ISA_4February2022.pdf) of the CDNA2 ISA guide.`
diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/instructions-per-cycle-and-utilizations.rst b/projects/rocprofiler-compute/docs/tutorial/includes/instructions-per-cycle-and-utilizations.rst
new file mode 100644
index 0000000000..dcbf372663
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/includes/instructions-per-cycle-and-utilizations.rst
@@ -0,0 +1,486 @@
+.. _ipc-example:
+
+Instructions-per-cycle and utilizations example
+===============================================
+
+For this example, consider the
+:dev-sample:`instructions-per-cycle (IPC) example <ipc.hip>` included with
+Omniperf.
+
+This example is compiled using ``c++17`` support:
+
+.. code-block:: shell
+
+   $ hipcc -O3 ipc.hip -o ipc -std=c++17
+
+and was run on an MI250 CDNA2 accelerator:
+
+.. code-block:: shell
+
+   $ omniperf profile -n ipc --no-roof -- ./ipc
+
+The results shown in this section are *generally* applicable to CDNA
+accelerators, but may vary between generations and specific products.
+
+.. _ipc-experiment-design-note:
+
+Design note
+-----------
+
+The kernels in this example all execute a specific assembly operation
+``N`` times (1000, by default), for instance the ``vmov`` kernel:
+
+.. code-block:: cpp
+
+   template<int N=1000>
+   __device__ void vmov_op() {
+       int dummy;
+       if constexpr (N >= 1) {
+           asm volatile("v_mov_b32 v0, v1\n" : : "{v31}"(dummy));
+           vmov_op<N - 1>();
+       }
+   }
+
+   template<int N=1000>
+   __global__ void vmov() {
+       vmov_op<N>();
+   }
+
+The kernels are then launched twice, once for a warm-up run, and once
+for measurement.
+
+.. _ipc-valu-utilization:
+
+VALU utilization and IPC
+------------------------
+
+Now we can use our test to measure the achieved instructions-per-cycle
+of various types of instructions. We start with a simple :ref:`VALU <desc-valu>`
+operation, i.e., a ``v_mov_b32`` instruction, e.g.:
+
+.. code-block:: asm
+
+   v_mov_b32 v0, v1
+
+This instruction simply copies the contents from the source register
+(``v1``) to the destination register (``v0``). Investigating this kernel
+with Omniperf, we see:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/ipc/mi200/ --dispatch 7 -b 11.2
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕
+   │    │ KernelName                    │   Count │     Sum(ns) │    Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡
+   │  0 │ void vmov<1000>() [clone .kd] │    1.00 │ 99317423.00 │ 99317423.00 │  99317423.00 │ 100.00 │
+   ╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   11. Compute Units - Compute Pipeline
+   11.2 Pipeline Stats
+   ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕
+   │ Index   │ Metric              │ Avg   │ Min   │ Max   │ Unit         │
+   ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡
+   │ 11.2.0  │ IPC                 │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.1  │ IPC (Issued)        │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.2  │ SALU Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.3  │ VALU Util           │ 99.98 │ 99.98 │ 99.98 │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.4  │ VMEM Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.5  │ Branch Util         │ 0.1   │ 0.1   │ 0.1   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.6  │ VALU Active Threads │ 64.0  │ 64.0  │ 64.0  │ Threads      │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.7  │ MFMA Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.8  │ MFMA Instr Cycles   │       │       │       │ Cycles/instr │
+   ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛
+
+Here we see that:
+
+1. Both the IPC (**11.2.0**) and “Issued” IPC (**11.2.1**) metrics are
+   :math:`\sim 1`
+2. The VALU Utilization metric (**11.2.3**) is also :math:`\sim100\%`, and
+   finally
+3. The VALU Active Threads metric (**11.2.4**) is 64, i.e., the wavefront
+   size on CDNA accelerators, as all threads in the wavefront are
+   active.
+
+We will explore the difference between the IPC (**11.2.0**) and “Issued” IPC
+(**11.2.1**) metrics in the :ref:`next section <issued-ipc>`.
+
+Additionally, we notice a small (0.1%) Branch utilization (**11.2.5**).
+Inspecting the assembly of this kernel shows there are no branch
+operations, however recalling the note in the :ref:`Pipeline
+statistics <pipeline-stats>` section:
+
+ The branch utilization <…> includes time spent in other instruction
+ types (namely: ``s_endpgm``) that are *typically* a very small
+ percentage of the overall kernel execution.
+
+We see that this is coming from execution of the ``s_endpgm``
+instruction at the end of every wavefront.
+
+.. note::
+
+   Technically, the cycle counts used in the denominators of our IPC metrics are
+   actually in units of quad-cycles, a group of 4 consecutive cycles. However, a
+   typical :ref:`VALU <desc-valu>` instruction on CDNA accelerators runs for a
+   single quad-cycle (see :gcn-crash-course:`30`). Therefore, for simplicity, we
+   simply report these metrics as "instructions per cycle".
+
+.. _issued-ipc:
+
+Exploring “issued” IPC via MFMA operations
+------------------------------------------
+
+.. warning::
+
+   The MFMA assembly operations used in this example are inherently not portable
+   to older CDNA architectures.
+
+Unlike the simple quad-cycle ``v_mov_b32`` operation discussed in our
+:ref:`previous example <ipc-valu-utilization>`, some operations take many
+quad-cycles to execute. For example, using the
+`AMD Matrix Instruction Calculator <https://github.com/RadeonOpenCompute/amd_matrix_instruction_calculator#example-of-querying-instruction-information>`_
+we can see that some :ref:`MFMA <desc-mfma>` operations take 64 cycles, e.g.:
+
+.. code-block:: shell
+
+   $ ./matrix_calculator.py --arch CDNA2 --detail-instruction --instruction v_mfma_f32_32x32x8bf16_1k
+   Architecture: CDNA2
+   Instruction: V_MFMA_F32_32X32X8BF16_1K
+   <...>
+       Execution statistics:
+           FLOPs: 16384
+           Execution cycles: 64
+           FLOPs/CU/cycle: 1024
+           Can co-execute with VALU: True
+           VALU co-execution cycles possible: 60
+
+What happens to our IPC when we utilize this ``v_mfma_f32_32x32x8bf16_1k``
+instruction on a CDNA2 accelerator? To find out, we turn to our ``mfma`` kernel
+in the IPC example:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/ipc/mi200/ --dispatch 8 -b 11.2 --decimal 4
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═══════════════════════════════╤═════════╤═════════════════╤═════════════════╤═════════════════╤══════════╕
+   │    │ KernelName                    │   Count │         Sum(ns) │        Mean(ns) │      Median(ns) │      Pct │
+   ╞════╪═══════════════════════════════╪═════════╪═════════════════╪═════════════════╪═════════════════╪══════════╡
+   │  0 │ void mfma<1000>() [clone .kd] │  1.0000 │ 1623167595.0000 │ 1623167595.0000 │ 1623167595.0000 │ 100.0000 │
+   ╘════╧═══════════════════════════════╧═════════╧═════════════════╧═════════════════╧═════════════════╧══════════╛
+
+
+   --------------------------------------------------------------------------------
+   11. Compute Units - Compute Pipeline
+   11.2 Pipeline Stats
+   ╒═════════╤═════════════════════╤═════════╤═════════╤═════════╤══════════════╕
+   │ Index   │ Metric              │     Avg │     Min │     Max │ Unit         │
+   ╞═════════╪═════════════════════╪═════════╪═════════╪═════════╪══════════════╡
+   │ 11.2.0  │ IPC                 │  0.0626 │  0.0626 │  0.0626 │ Instr/cycle  │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.1  │ IPC (Issued)        │  1.0000 │  1.0000 │  1.0000 │ Instr/cycle  │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.2  │ SALU Util           │  0.0000 │  0.0000 │  0.0000 │ Pct          │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.3  │ VALU Util           │  6.2496 │  6.2496 │  6.2496 │ Pct          │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.4  │ VMEM Util           │  0.0000 │  0.0000 │  0.0000 │ Pct          │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.5  │ Branch Util         │  0.0062 │  0.0062 │  0.0062 │ Pct          │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.6  │ VALU Active Threads │ 64.0000 │ 64.0000 │ 64.0000 │ Threads      │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.7  │ MFMA Util           │ 99.9939 │ 99.9939 │ 99.9939 │ Pct          │
+   ├─────────┼─────────────────────┼─────────┼─────────┼─────────┼──────────────┤
+   │ 11.2.8  │ MFMA Instr Cycles   │ 64.0000 │ 64.0000 │ 64.0000 │ Cycles/instr │
+   ╘═════════╧═════════════════════╧═════════╧═════════╧═════════╧══════════════╛
+
+In contrast to our :ref:`VALU IPC example <ipc-valu-utilization>`, we now see
+that the IPC metric (**11.2.0**) and Issued IPC (**11.2.1**) metric differ
+substantially. First, we see the VALU utilization (**11.2.3**) has decreased
+substantially, from nearly 100% to :math:`\sim6.25\%`. We note that this matches
+the ratio of: :math:`((Execution\ cycles) - (VALU\ coexecution\ cycles)) / (Execution\ cycles)`
+reported by the matrix calculator, while the MFMA utilization (**11.2.7**)
+has increased to nearly 100%.
+
+Recall that our ``v_mfma_f32_32x32x8bf16_1k`` instruction takes 64 cycles to
+execute, or 16 quad-cycles, matching our observed MFMA Instruction
+Cycles (**11.2.8**). That is, we have a single instruction executed every 16
+quad-cycles, or :math:`1/16 = 0.0625`, which is almost identical to our IPC
+metric (**11.2.0**). Why then is the Issued IPC metric (**11.2.1**) equal to 1.0?
+
+Instead of simply counting the number of instructions issued and
+dividing by the number of cycles the :doc:`CUs </conceptual/compute-unit>` on
+the accelerator were active (as is done for **11.2.0**), this metric is formulated
+differently, and instead counts the number of
+(non-:ref:`internal <ipc-internal-instructions>`) instructions issued divided
+by the number of (quad-) cycles where the :ref:`scheduler <desc-scheduler>` was
+actively working on issuing instructions. Thus the Issued IPC metric
+(**11.2.1**) gives more of a sense of “what percent of the total number of
+:ref:`scheduler <desc-scheduler>` cycles did a wave schedule an instruction?”
+while the IPC metric (**11.2.0**) indicates the ratio of the number of
+instructions executed over the total
+:ref:`active CU cycles <total-active-cu-cycles>`.
+
+.. warning::
+
+   There are further complications of the Issued IPC metric (**11.2.1**) that make
+   its use more complicated. We will be explore that in the
+   :ref:`following section <ipc-internal-instructions>`. For these reasons,
+   Omniperf typically promotes use of the regular IPC metric (**11.2.0**), e.g., in
+   the top-level Speed-of-Light chart.
+
+.. _ipc-internal-instructions:
+
+Internal instructions and IPC
+-----------------------------
+
+Next, we explore the concept of an “internal” instruction. From
+:gcn-crash-course:`29`, we see a few candidates for internal instructions, and
+we choose a ``s_nop`` instruction, which according to the
+:mi200-isa-pdf:`CDNA2 ISA guide <>`:
+
+ Does nothing; it can be repeated in hardware up to eight times.
+
+Here we choose to use the following no-op to make our point:
+
+.. code-block:: asm
+
+   s_nop 0x0
+
+Running this kernel through Omniperf yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/ipc/mi200/ --dispatch 9 -b 11.2
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕
+   │    │ KernelName                    │   Count │     Sum(ns) │    Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡
+   │  0 │ void snop<1000>() [clone .kd] │    1.00 │ 14221851.50 │ 14221851.50 │  14221851.50 │ 100.00 │
+   ╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   11. Compute Units - Compute Pipeline
+   11.2 Pipeline Stats
+   ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕
+   │ Index   │ Metric              │ Avg   │ Min   │ Max   │ Unit         │
+   ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡
+   │ 11.2.0  │ IPC                 │ 6.79  │ 6.79  │ 6.79  │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.1  │ IPC (Issued)        │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.2  │ SALU Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.3  │ VALU Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.4  │ VMEM Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.5  │ Branch Util         │ 0.68  │ 0.68  │ 0.68  │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.6  │ VALU Active Threads │       │       │       │ Threads      │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.7  │ MFMA Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.8  │ MFMA Instr Cycles   │       │       │       │ Cycles/instr │
+   ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛
+
+First, we see that the IPC metric (**11.2.0**) tops our theoretical maximum
+of 5 instructions per cycle (discussed in the :ref:`scheduler <desc-scheduler>`
+section). How can this be?
+
+Recall that :gcn-crash-course:`27` say “no functional unit” for the internal
+instructions. This removes the limitation on the IPC. If we are *only*
+issuing internal instructions, we are not issuing to any execution
+units! However, workloads such as these are almost *entirely* artificial
+(that is, repeatedly issuing internal instructions almost exclusively). In
+practice, a maximum of IPC of 5 is expected in almost all cases.
+
+Secondly, note that our “Issued” IPC (**11.2.1**) is still identical to
+the one here. Again, this has to do with the details of “internal”
+instructions. Recall in our :ref:`previous example <issued-ipc>` we defined
+this metric as explicitly excluding internal instruction counts. The
+logical question then is, "what *is* this metric counting in our
+``s_nop`` kernel?"
+
+The generated assembly looks something like:
+
+.. code-block:: asm
+
+   ;;#ASMSTART
+   s_nop 0x0
+   ;;#ASMEND
+   ;;#ASMSTART
+   s_nop 0x0
+   ;;#ASMEND
+   ;;<... omitting many more ...>
+   s_endpgm
+   .section        .rodata,#alloc
+   .p2align        6, 0x0
+   .amdhsa_kernel _Z4snopILi1000EEvv
+
+Of particular interest here is the ``s_endpgm`` instruction, of which
+the `CDNA2 ISA
+guide <https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf>`__
+states:
+
+   End of program; terminate wavefront.
+
+This is not on our list of internal instructions from
+:gcn-crash-course:`The AMD GCN Architecture <>`, and is therefore counted as part
+of our Issued IPC (**11.2.1**). Thus, the issued IPC being equal to one here
+indicates that we issued an ``s_endpgm`` instruction every cycle the
+:ref:`scheduler <desc-scheduler>` was active for non-internal instructions, which
+is expected as this was our *only* non-internal instruction.
+
+SALU Utilization
+----------------
+
+Next, we explore a simple :ref:`SALU <desc-salu>` kernel in our on-going IPC and
+utilization example. For this case, we select a simple scalar move
+operation, for instance:
+
+.. code-block:: asm
+
+   s_mov_b32 s0, s1
+
+which, in analogue to our :ref:`v_mov <ipc-valu-utilization>` example, copies the
+contents of the source scalar register (``s1``) to the destination
+scalar register (``s0``). Running this kernel through Omniperf yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/ipc/mi200/ --dispatch 10 -b 11.2
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═══════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕
+   │    │ KernelName                    │   Count │     Sum(ns) │    Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪═══════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡
+   │  0 │ void smov<1000>() [clone .kd] │    1.00 │ 96246554.00 │ 96246554.00 │  96246554.00 │ 100.00 │
+   ╘════╧═══════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   11. Compute Units - Compute Pipeline
+   11.2 Pipeline Stats
+   ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕
+   │ Index   │ Metric              │ Avg   │ Min   │ Max   │ Unit         │
+   ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡
+   │ 11.2.0  │ IPC                 │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.1  │ IPC (Issued)        │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.2  │ SALU Util           │ 99.98 │ 99.98 │ 99.98 │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.3  │ VALU Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.4  │ VMEM Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.5  │ Branch Util         │ 0.1   │ 0.1   │ 0.1   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.6  │ VALU Active Threads │       │       │       │ Threads      │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.7  │ MFMA Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.8  │ MFMA Instr Cycles   │       │       │       │ Cycles/instr │
+   ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛
+
+Here we see that:
+
+- Both our IPC (**11.2.0**) and Issued IPC (**11.2.1**) are
+  :math:`\sim1.0` as expected, and
+
+- The SALU Utilization (**11.2.2**) was
+  nearly 100% as it was active for almost the entire kernel.
+
+VALU Active Threads
+-------------------
+
+For our final IPC/Utilization example, we consider a slight modification
+of our :ref:`v_mov <ipc-valu-utilization>` example:
+
+.. code-block:: cpp
+
+   template<int N=1000>
+   __global__ void vmov_with_divergence() {
+       if (threadIdx.x % 64 == 0)
+           vmov_op<N>();
+   }
+
+That is, we wrap our :ref:`VALU <desc-valu>` operation inside a conditional
+where only one lane in our wavefront is active. Running this kernel
+through Omniperf yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/ipc/mi200/ --dispatch 11 -b 11.2
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤══════════════════════════════════════════╤═════════╤═════════════╤═════════════╤══════════════╤════════╕
+   │    │ KernelName                               │   Count │     Sum(ns) │    Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪══════════════════════════════════════════╪═════════╪═════════════╪═════════════╪══════════════╪════════╡
+   │  0 │ void vmov_with_divergence<1000>() [clone │    1.00 │ 97125097.00 │ 97125097.00 │  97125097.00 │ 100.00 │
+   │    │  .kd]                                    │         │             │             │              │        │
+   ╘════╧══════════════════════════════════════════╧═════════╧═════════════╧═════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   11. Compute Units - Compute Pipeline
+   11.2 Pipeline Stats
+   ╒═════════╤═════════════════════╤═══════╤═══════╤═══════╤══════════════╕
+   │ Index   │ Metric              │ Avg   │ Min   │ Max   │ Unit         │
+   ╞═════════╪═════════════════════╪═══════╪═══════╪═══════╪══════════════╡
+   │ 11.2.0  │ IPC                 │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.1  │ IPC (Issued)        │ 1.0   │ 1.0   │ 1.0   │ Instr/cycle  │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.2  │ SALU Util           │ 0.1   │ 0.1   │ 0.1   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.3  │ VALU Util           │ 99.98 │ 99.98 │ 99.98 │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.4  │ VMEM Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.5  │ Branch Util         │ 0.2   │ 0.2   │ 0.2   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.6  │ VALU Active Threads │ 1.13  │ 1.13  │ 1.13  │ Threads      │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.7  │ MFMA Util           │ 0.0   │ 0.0   │ 0.0   │ Pct          │
+   ├─────────┼─────────────────────┼───────┼───────┼───────┼──────────────┤
+   │ 11.2.8  │ MFMA Instr Cycles   │       │       │       │ Cycles/instr │
+   ╘═════════╧═════════════════════╧═══════╧═══════╧═══════╧══════════════╛
+
+Here we see that once again, our VALU Utilization (**11.2.3**) is nearly
+100%. However, we note that the VALU Active Threads metric (**11.2.6**) is
+:math:`\sim 1`, which matches our conditional in the source code. So
+VALU Active Threads reports the average number of lanes of our wavefront
+that are active over all :ref:`VALU <desc-valu>` instructions, or thread
+“convergence” (i.e., 1 - :ref:`divergence <desc-divergence>`).
+
+.. note::
+
+   1. The act of evaluating a vector conditional in this example typically triggers VALU operations, contributing to why the VALU Active Threads metric is not identically one.
+   2. This metric is a time (cycle) averaged value, and thus contains an implicit dependence on the duration of various VALU instructions.
+
+   Nonetheless, this metric serves as a useful measure of thread-convergence.
+
+Finally, we note that our branch utilization (**11.2.5**) has increased
+slightly from our baseline, as we now have a branch (checking the value
+of ``threadIdx.x``).
diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/lds-examples.rst b/projects/rocprofiler-compute/docs/tutorial/includes/lds-examples.rst
new file mode 100644
index 0000000000..f6cff7b722
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/includes/lds-examples.rst
@@ -0,0 +1,272 @@
+.. _lds-examples:
+
+LDS examples
+============
+
+For this example, consider the
+:dev-sample:`LDS sample <lds.hip>` distributed as a part of Omniperf. This
+code contains two kernels to explore how both :doc:`LDS </conceptual/local-data-share>` bandwidth and
+bank conflicts are calculated in Omniperf.
+
+This example was compiled and run on an MI250 accelerator using ROCm
+v5.6.0, and Omniperf v2.0.0.
+
+.. code-block:: shell-session
+
+   $ hipcc -O3 lds.hip -o lds
+
+Finally, we generate our ``omniperf profile`` as:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n lds --no-roof -- ./lds
+
+.. _lds-bandwidth:
+
+LDS bandwidth
+-------------
+
+To explore our *theoretical LDS bandwidth* metric, we use a simple
+kernel:
+
+.. code-block:: cpp
+
+   constexpr unsigned max_threads = 256;
+   __global__ void load(int* out, int flag) {
+     __shared__ int array[max_threads];
+     int index = threadIdx.x;
+     // fake a store to the LDS array to avoid unwanted behavior
+     if (flag)
+       array[max_threads - index] = index;
+     __syncthreads();
+     int x = array[index];
+     if (x == int(-1234567))
+       out[threadIdx.x] = x;
+   }
+
+Here we:
+
+* Create an array of 256 integers in :doc:`LDS </conceptual/local-data-share>`
+
+* Fake a write to the LDS using the ``flag`` variable (always set to zero on the
+  host) to avoid dead-code elimination
+
+* Read a single integer per work-item from ``threadIdx.x`` of the LDS array
+
+* If the integer is equal to a magic number (always false), write the value out
+  to global memory to again, avoid dead-code elimination
+
+Finally, we launch this kernel repeatedly, varying the number of threads
+in our workgroup:
+
+.. code-block:: cpp
+
+   void bandwidth_demo(int N) {
+     for (int i = 1; i <= N; ++i)
+       load<<<1,i>>>(nullptr, 0);
+     hipDeviceSynchronize();
+   }
+
+Next, let’s analyze the first of our bandwidth kernel dispatches:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/lds/mi200/ -b 12.2.1 --dispatch 0 -n per_kernel
+   <...>
+   12. Local Data Share (LDS)
+   12.2 LDS Stats
+   ╒═════════╤═══════════════════════╤════════╤════════╤════════╤══════════════════╕
+   │ Index   │ Metric                │    Avg │    Min │    Max │ Unit             │
+   ╞═════════╪═══════════════════════╪════════╪════════╪════════╪══════════════════╡
+   │ 12.2.1  │ Theoretical Bandwidth │ 256.00 │ 256.00 │ 256.00 │ Bytes per kernel │
+   ╘═════════╧═══════════════════════╧════════╧════════╧════════╧══════════════════╛
+
+Here we see that our Theoretical Bandwidth metric (**12.2.1**) is reporting
+256 Bytes were loaded even though we launched a single work-item
+workgroup, and thus only loaded a single integer from LDS. Why is this?
+
+Recall our definition of this metric:
+
+   Indicates the maximum amount of bytes that could have been loaded
+   from/stored to/atomically updated in the LDS per
+   :ref:`normalization unit <normalization-units>`.
+
+Here we see that this instruction *could* have loaded up to 256 bytes of
+data (4 bytes for each work-item in the wavefront), and therefore this
+is the expected value for this metric in Omniperf, hence why this metric
+is named the “theoretical” bandwidth.
+
+To further illustrate this point we plot the relationship of the
+theoretical bandwidth metric (**12.2.1**) as compared to the effective (or
+achieved) bandwidth of this kernel, varying the number of work-items
+launched from 1 to 256:
+
+.. figure:: ../data/profiling-by-example/ldsbandwidth.png
+   :align: center
+   :alt: Comparison of effective bandwidth versus the theoretical bandwidth
+         metric in Omniperf for our simple example.
+   :width: 800
+
+   Comparison of effective bandwidth versus the theoretical bandwidth
+   metric in Omniperf for our simple example.
+
+Here we see that the theoretical bandwidth metric follows a step-function. It
+increases only when another wavefront issues an LDS instruction for up to 256
+bytes of data. Such increases are marked in the plot using dashed lines. In
+contrast, the effective bandwidth increases linearly, by 4 bytes, with the
+number of work-items in the kernel, N.
+
+.. _lds-bank-conflicts:
+
+Bank conflicts
+--------------
+
+Next we explore bank conflicts using a slight modification of our bandwidth
+kernel:
+
+.. code-block:: cpp
+
+   constexpr unsigned nbanks = 32;
+   __global__ void conflicts(int* out, int flag) {
+     constexpr unsigned nelements = nbanks * max_threads;
+     __shared__ int array[nelements];
+     // each thread reads from the same bank
+     int index = threadIdx.x * nbanks;
+     // fake a store to the LDS array to avoid unwanted behavior
+     if (flag)
+       array[max_threads - index] = index;
+     __syncthreads();
+     int x = array[index];
+     if (x == int(-1234567))
+       out[threadIdx.x] = x;
+   }
+
+Here we:
+
+* Allocate an :doc:`LDS </conceptual/local-data-share>` array of size
+  :math:`32*256*4{B}=32{KiB}`
+
+* Fake a write to the LDS using the ``flag``
+  variable (always set to zero on the host) to avoid dead-code elimination
+
+* Read a single integer per work-item from index
+  ``threadIdx.x * nbanks`` of the LDS array
+
+* If the integer is equal to a
+  magic number (always false), write the value out to global memory to,
+  again, avoid dead-code elimination.
+
+On the host, we again repeatedly launch this kernel, varying the number
+of work-items:
+
+.. code-block:: cpp
+
+   void conflicts_demo(int N) {
+     for (int i = 1; i <= N; ++i)
+       conflicts<<<1,i>>>(nullptr, 0);
+     hipDeviceSynchronize();
+   }
+
+Analyzing our first ``conflicts`` kernel (i.e., a single work-item), we
+see:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/lds/mi200/ -b 12.2.4 12.2.6 --dispatch 256 -n per_kernel
+   <...>
+   --------------------------------------------------------------------------------
+   12. Local Data Share (LDS)
+   12.2 LDS Stats
+   ╒═════════╤════════════════╤═══════╤═══════╤═══════╤═══════════════════╕
+   │ Index   │ Metric         │   Avg │   Min │   Max │ Unit              │
+   ╞═════════╪════════════════╪═══════╪═══════╪═══════╪═══════════════════╡
+   │ 12.2.4  │ Index Accesses │  2.00 │  2.00 │  2.00 │ Cycles per kernel │
+   ├─────────┼────────────────┼───────┼───────┼───────┼───────────────────┤
+   │ 12.2.6  │ Bank Conflict  │  0.00 │  0.00 │  0.00 │ Cycles per kernel │
+   ╘═════════╧════════════════╧═══════╧═══════╧═══════╧═══════════════════╛
+
+In our :ref:`previous example <lds-bank-conflicts>`, we showed how a load
+from a single work-item is considered to have a theoretical bandwidth of
+256B. Recall, the :doc:`LDS </conceptual/local-data-share>` can load up to :math:`128B` per
+cycle (i.e, 32 banks x 4B / bank / cycle). Hence, we see that loading an 4B
+integer spends two cycles accessing the LDS
+(:math:`2\ {cycle} = (256B) / (128\ B/{cycle})`).
+
+Looking at the next ``conflicts`` dispatch (i.e., two work-items) yields:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/lds/mi200/ -b 12.2.4 12.2.6 --dispatch 257 -n per_kernel
+   <...>
+   --------------------------------------------------------------------------------
+   12. Local Data Share (LDS)
+   12.2 LDS Stats
+   ╒═════════╤════════════════╤═══════╤═══════╤═══════╤═══════════════════╕
+   │ Index   │ Metric         │   Avg │   Min │   Max │ Unit              │
+   ╞═════════╪════════════════╪═══════╪═══════╪═══════╪═══════════════════╡
+   │ 12.2.4  │ Index Accesses │  3.00 │  3.00 │  3.00 │ Cycles per kernel │
+   ├─────────┼────────────────┼───────┼───────┼───────┼───────────────────┤
+   │ 12.2.6  │ Bank Conflict  │  1.00 │  1.00 │  1.00 │ Cycles per kernel │
+   ╘═════════╧════════════════╧═══════╧═══════╧═══════╧═══════════════════╛
+
+Here we see a bank conflict! What happened?
+
+Recall that the index for each thread was calculated as:
+
+.. code-block:: cpp
+
+   int index = threadIdx.x * nbanks;
+
+Or, precisely 32 elements, and each element is 4B wide (for a standard
+integer). That is, each thread strides back to the same bank in the LDS,
+such that each work-item we add to the dispatch results in another bank
+conflict!
+
+Recalling our discussion of bank conflicts in our
+:doc:`LDS </conceptual/local-data-share>` description:
+
+A bank conflict occurs when two (or more) work-items in a wavefront
+want to read, write, or atomically update different addresses that
+map to the same bank in the same cycle. In this case, the conflict
+detection hardware will determined a new schedule such that the
+access is split into multiple cycles with no conflicts in any
+single cycle.
+
+Here we see the conflict resolution hardware in action! Because we have
+engineered our kernel to generate conflicts, we expect our bank conflict
+metric to scale linearly with the number of work-items:
+
+.. figure:: ../data/profiling-by-example/ldsconflicts.png
+   :align: center
+   :alt: Comparison of LDS conflict cycles versus access cycles for our simple
+         example.
+   :width: 800
+
+   Comparison of LDS conflict cycles versus access cycles for our simple
+   example.
+
+Here we show the comparison of the Index Accesses (**12.2.4**), to the Bank
+Conflicts (**12.2.6**) for the first 20 kernel invocations. We see that each grows
+linearly, and there is a constant gap of 2 cycles between them (i.e., the first
+access is never considered a conflict).
+
+Finally, we can use these two metrics to derive the Bank Conflict Rate (**12.1.4**).
+Since within an Index Access we have 32 banks that may need to be updated, we
+use:
+
+$$
+Bank\ Conflict\ Rate = 100 * ((Bank\ Conflicts / 32) / (Index\ Accesses - Bank\ Conflicts))
+$$
+
+Plotting this, we see:
+
+.. figure:: ../data/profiling-by-example/ldsconflictrate.png
+   :align: center
+   :alt: LDS bank conflict rate example
+   :width: 800
+
+   LDS Bank Conflict rate for our simple example.
+
+The bank conflict rate linearly increases with the number of work-items
+within a wavefront that are active, *approaching* 100%, but never quite
+reaching it.
diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/occupancy-limiters-example.rst b/projects/rocprofiler-compute/docs/tutorial/includes/occupancy-limiters-example.rst
new file mode 100644
index 0000000000..3242545338
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/includes/occupancy-limiters-example.rst
@@ -0,0 +1,456 @@
+.. _occupancy-example:
+
+Occupancy limiters example
+==========================
+
+For this example, consider the
+:dev-sample:`occupancy <occupancy.hip>` included with Omniperf. We will
+investigate the use of the resource allocation panel in the
+:ref:`Workgroup Manager <desc-spi>`’s metrics section to determine occupancy
+limiters. This code contains several kernels to explore how both various
+kernel resources impact achieved occupancy, and how this is reported in
+Omniperf.
+
+This example was compiled and run on a MI250 accelerator using ROCm
+v5.6.0, and Omniperf v2.0.0:
+
+.. code-block:: shell
+
+   $ hipcc -O3 occupancy.hip -o occupancy --save-temps
+
+We have again included the ``--save-temps`` flag to get the
+corresponding assembly.
+
+Finally, we generate our Omniperf profile as:
+
+.. code-block:: shell
+
+   $ omniperf profile -n occupancy --no-roof -- ./occupancy
+
+.. _occupancy-experiment-design:
+
+Design note
+-----------
+
+For our occupancy test, we need to create a kernel that is resource
+heavy, in various ways. For this purpose, we use the following (somewhat
+funny-looking) kernel:
+
+.. code-block:: cpp
+
+   constexpr int bound = 16;
+   __launch_bounds__(256)
+   __global__ void vgprbound(int N, double* ptr) {
+       double intermediates[bound];
+       for (int i = 0 ; i < bound; ++i) intermediates[i] = N * threadIdx.x;
+       double x = ptr[threadIdx.x];
+       for (int i = 0; i < 100; ++i) {
+           x += sin(pow(__shfl(x, i % warpSize) * intermediates[(i - 1) % bound], intermediates[i % bound]));
+           intermediates[i % bound] = x;
+       }
+       if (x == N) ptr[threadIdx.x] = x;
+   }
+
+Here we try to use as many :ref:`VGPRs <desc-valu>` as possible, to this end:
+
+* We create a small array of double precision floats, that we size to try
+  to fit into registers (i.e., ``bound``, this may need to be tuned
+  depending on the ROCm version).
+
+* We specify ``__launch_bounds___(256)``
+  to increase the number of VPGRs available to the kernel (by limiting the
+  number of wavefronts that can be resident on a
+  :doc:`CU </conceptual/compute-unit>`).
+
+* Write a unique non-compile time constant to each element of the array.
+
+* Repeatedly permute and call relatively expensive math functions on our
+  array elements.
+
+* Keep the compiler from optimizing out any operations by faking a write to the
+  ``ptr`` based on a run-time conditional.
+
+This yields a total of 122 VGPRs, but it is expected this number will
+depend on the exact ROCm/compiler version.
+
+.. code-block:: asm
+
+           .size   _Z9vgprboundiPd, .Lfunc_end1-_Z9vgprboundiPd
+                                           ; -- End function
+           .section        .AMDGPU.csdata
+   ; Kernel info:
+   ; codeLenInByte = 4732
+   ; NumSgprs: 68
+   ; NumVgprs: 122
+   ; NumAgprs: 0
+   ; <...>
+   ; AccumOffset: 124
+
+We will use various permutations of this kernel to limit occupancy, and
+more importantly for the purposes of this example, demonstrate how this
+is reported in Omniperf.
+
+.. _vgpr-occupancy:
+
+VGPR limited
+------------
+
+For our first test, we use the ``vgprbound`` kernel discussed in the
+:ref:`design note <occupancy-experiment-design>`. After profiling, we run
+the analyze step on this kernel:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 --dispatch 1
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═════════════════════════╤═════════╤══════════════╤══════════════╤══════════════╤════════╕
+   │    │ KernelName              │   Count │      Sum(ns) │     Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪═════════════════════════╪═════════╪══════════════╪══════════════╪══════════════╪════════╡
+   │  0 │ vgprbound(int, double*) │    1.00 │ 923093822.50 │ 923093822.50 │ 923093822.50 │ 100.00 │
+   ╘════╧═════════════════════════╧═════════╧══════════════╧══════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   2. System Speed-of-Light
+   2.1 Speed-of-Light
+   ╒═════════╤═════════════════════╤═════════╤════════════╤═════════╤═══════════════╕
+   │ Index   │ Metric              │     Avg │ Unit       │    Peak │   Pct of Peak │
+   ╞═════════╪═════════════════════╪═════════╪════════════╪═════════╪═══════════════╡
+   │ 2.1.15  │ Wavefront Occupancy │ 1661.24 │ Wavefronts │ 3328.00 │         49.92 │
+   ╘═════════╧═════════════════════╧═════════╧════════════╧═════════╧═══════════════╛
+
+
+   --------------------------------------------------------------------------------
+   6. Workgroup Manager (SPI)
+   6.2 Workgroup Manager - Resource Allocation
+   ╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                                 │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 6.2.0   │ Not-scheduled Rate (Workgroup Manager) │  0.64 │  0.64 │  0.64 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.1   │ Not-scheduled Rate (Scheduler-Pipe)    │ 24.94 │ 24.94 │ 24.94 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.2   │ Scheduler-Pipe Stall Rate              │ 24.49 │ 24.49 │ 24.49 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.3   │ Scratch Stall Rate                     │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.4   │ Insufficient SIMD Waveslots            │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.5   │ Insufficient SIMD VGPRs                │ 94.90 │ 94.90 │ 94.90 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.6   │ Insufficient SIMD SGPRs                │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.7   │ Insufficient CU LDS                    │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.8   │ Insufficient CU Barriers               │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.9   │ Reached CU Workgroup Limit             │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.10  │ Reached CU Wavefront Limit             │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   7. Wavefront
+   7.1 Wavefront Launch Stats
+   ╒═════════╤══════════╤════════╤════════╤════════╤═══════════╕
+   │ Index   │ Metric   │    Avg │    Min │    Max │ Unit      │
+   ╞═════════╪══════════╪════════╪════════╪════════╪═══════════╡
+   │ 7.1.5   │ VGPRs    │ 124.00 │ 124.00 │ 124.00 │ Registers │
+   ├─────────┼──────────┼────────┼────────┼────────┼───────────┤
+   │ 7.1.6   │ AGPRs    │   4.00 │   4.00 │   4.00 │ Registers │
+   ├─────────┼──────────┼────────┼────────┼────────┼───────────┤
+   │ 7.1.7   │ SGPRs    │  80.00 │  80.00 │  80.00 │ Registers │
+   ╘═════════╧══════════╧════════╧════════╧════════╧═══════════╛
+
+Here we see that the kernel indeed does use *around* (but not exactly)
+122 VGPRs, with the difference due to granularity of VGPR allocations.
+In addition, we see that we have allocated 4 “:ref:`AGPRs <desc-agprs>`”. We
+note that on current CDNA2 accelerators, the ``AccumOffset`` field of
+the assembly metadata:
+
+.. code-block:: asm
+
+   ; AccumOffset: 124
+
+denotes the divide between ``VGPRs`` and ``AGPRs``.
+
+Next, we examine our wavefront occupancy (**2.1.15**), and see that we are
+reaching only :math:`\sim50\%` of peak occupancy. As a result, we see
+that:
+
+- We are not scheduling workgroups :math:`\sim25\%` of
+  :ref:`total scheduler-pipe cycles <total-pipe-cycles>` (**6.2.1**); recall
+  from the discussion of the `workgroup manager <desc-spi>`, 25% is the maximum.
+
+- The scheduler-pipe is stalled (**6.2.2**) from scheduling workgroups due to
+  resource constraints for the same :math:`\sim25\%` of the time.
+
+- And finally, :math:`\sim91\%` of those stalls are due to a lack of SIMDs
+  with the appropriate number of VGPRs available (6.2.5).
+
+That is, the reason we can’t reach full occupancy is due to our VGPR
+usage, as expected!
+
+LDS limited
+-----------
+
+To examine an LDS limited example, we must change our kernel slightly:
+
+.. code-block:: cpp
+
+   constexpr size_t fully_allocate_lds = 64ul * 1024ul / sizeof(double);
+   __launch_bounds__(256)
+   __global__ void ldsbound(int N, double* ptr) {
+       __shared__ double intermediates[fully_allocate_lds];
+       for (int i = threadIdx.x ; i < fully_allocate_lds; i += blockDim.x) intermediates[i] = N * threadIdx.x;
+       __syncthreads();
+       double x = ptr[threadIdx.x];
+       for (int i = threadIdx.x; i < fully_allocate_lds; i += blockDim.x) {
+           x += sin(pow(__shfl(x, i % warpSize) * intermediates[(i - 1) % fully_allocate_lds], intermediates[i % fully_allocate_lds]));
+           __syncthreads();
+           intermediates[i % fully_allocate_lds] = x;
+       }
+       if (x == N) ptr[threadIdx.x] = x;
+   }
+
+Where we now:
+
+* Allocate an 64 KiB LDS array per workgroup, and
+
+* Use our allocated LDS array instead of a register array
+
+Analyzing this:
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 7.1.8 --dispatch 3
+   <...>
+   --------------------------------------------------------------------------------
+   2. System Speed-of-Light
+   2.1 Speed-of-Light
+   ╒═════════╤═════════════════════╤════════╤════════════╤═════════╤═══════════════╕
+   │ Index   │ Metric              │    Avg │ Unit       │    Peak │   Pct of Peak │
+   ╞═════════╪═════════════════════╪════════╪════════════╪═════════╪═══════════════╡
+   │ 2.1.15  │ Wavefront Occupancy │ 415.52 │ Wavefronts │ 3328.00 │         12.49 │
+   ╘═════════╧═════════════════════╧════════╧════════════╧═════════╧═══════════════╛
+
+
+   --------------------------------------------------------------------------------
+   6. Workgroup Manager (SPI)
+   6.2 Workgroup Manager - Resource Allocation
+   ╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                                 │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 6.2.0   │ Not-scheduled Rate (Workgroup Manager) │  0.13 │  0.13 │  0.13 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.1   │ Not-scheduled Rate (Scheduler-Pipe)    │ 24.87 │ 24.87 │ 24.87 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.2   │ Scheduler-Pipe Stall Rate              │ 24.84 │ 24.84 │ 24.84 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.3   │ Scratch Stall Rate                     │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.4   │ Insufficient SIMD Waveslots            │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.5   │ Insufficient SIMD VGPRs                │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.6   │ Insufficient SIMD SGPRs                │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.7   │ Insufficient CU LDS                    │ 96.47 │ 96.47 │ 96.47 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.8   │ Insufficient CU Barriers               │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.9   │ Reached CU Workgroup Limit             │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.10  │ Reached CU Wavefront Limit             │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   7. Wavefront
+   7.1 Wavefront Launch Stats
+   ╒═════════╤════════════════╤══════════╤══════════╤══════════╤═══════════╕
+   │ Index   │ Metric         │      Avg │      Min │      Max │ Unit      │
+   ╞═════════╪════════════════╪══════════╪══════════╪══════════╪═══════════╡
+   │ 7.1.5   │ VGPRs          │    96.00 │    96.00 │    96.00 │ Registers │
+   ├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤
+   │ 7.1.6   │ AGPRs          │     0.00 │     0.00 │     0.00 │ Registers │
+   ├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤
+   │ 7.1.7   │ SGPRs          │    80.00 │    80.00 │    80.00 │ Registers │
+   ├─────────┼────────────────┼──────────┼──────────┼──────────┼───────────┤
+   │ 7.1.8   │ LDS Allocation │ 65536.00 │ 65536.00 │ 65536.00 │ Bytes     │
+   ╘═════════╧════════════════╧══════════╧══════════╧══════════╧═══════════╛
+
+We see that our VGPR allocation has gone down to 96 registers, but now
+we see our 64KiB LDS allocation (**7.1.8**). In addition, we see a similar
+non-schedule rate (**6.2.1**) and stall rate (**6.2.2**) as in our
+:ref:`VGPR example <vgpr-occupancy>`. However, our occupancy limiter has now
+shifted from VGPRs (**6.2.5**) to LDS (**6.2.7**).
+
+We note that although we see the around the same scheduler/stall rates
+(with our LDS limiter), our wave occupancy (**2.1.15**) is significantly
+lower (:math:`\sim12\%`)! This is important to remember: the occupancy
+limiter metrics in the resource allocation section tell you what the
+limiter was, but *not* how much the occupancy was limited. These metrics
+should always be analyzed in concert with the wavefront occupancy
+metric!
+
+.. _sgpr-occupancy:
+
+SGPR limited
+------------
+
+Finally, we modify our kernel once more to make it limited by
+`SGPRs <salu>`__:
+
+.. code-block:: cpp
+
+   constexpr int sgprlim = 1;
+   __launch_bounds__(1024, 8)
+   __global__ void sgprbound(int N, double* ptr) {
+       double intermediates[sgprlim];
+       for (int i = 0 ; i < sgprlim; ++i) intermediates[i] = i;
+       double x = ptr[0];
+       #pragma unroll 1
+       for (int i = 0; i < 100; ++i) {
+           x += sin(pow(intermediates[(i - 1) % sgprlim], intermediates[i % sgprlim]));
+           intermediates[i % sgprlim] = x;
+       }
+       if (x == N) ptr[0] = x;
+   }
+
+The major changes here are to: - make as much as possible provably
+uniform across the wave (notice the lack of ``threadIdx.x`` in the
+``intermediates`` initialization and elsewhere), - addition of
+``__launch_bounds__(1024, 8)``, which reduces our maximum VGPRs to 64
+(such that 8 waves can fit per SIMD), but causes some register spills
+(i.e., :ref:`scratch <memory-spaces>` usage), and - lower the ``bound`` (here we
+use ``sgprlim``) of the array to reduce VGPR/Scratch usage.
+
+This results in the following assembly metadata for this kernel:
+
+.. code-block:: asm
+
+           .size   _Z9sgprboundiPd, .Lfunc_end3-_Z9sgprboundiPd
+                                           ; -- End function
+           .section        .AMDGPU.csdata
+   ; Kernel info:
+   ; codeLenInByte = 4872
+   ; NumSgprs: 76
+   ; NumVgprs: 64
+   ; NumAgprs: 0
+   ; TotalNumVgprs: 64
+   ; ScratchSize: 60
+   ; <...>
+   ; AccumOffset: 64
+   ; Occupancy: 8
+
+Analyzing this workload yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/occupancy/mi200/ -b 2.1.15 6.2 7.1.5 7.1.6 7.1.7 7.1.8 7.1.9 --dispatch 5
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═════════════════════════╤═════════╤══════════════╤══════════════╤══════════════╤════════╕
+   │    │ KernelName              │   Count │      Sum(ns) │     Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪═════════════════════════╪═════════╪══════════════╪══════════════╪══════════════╪════════╡
+   │  0 │ sgprbound(int, double*) │    1.00 │ 782069812.00 │ 782069812.00 │ 782069812.00 │ 100.00 │
+   ╘════╧═════════════════════════╧═════════╧══════════════╧══════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   2. System Speed-of-Light
+   2.1 Speed-of-Light
+   ╒═════════╤═════════════════════╤═════════╤════════════╤═════════╤═══════════════╕
+   │ Index   │ Metric              │     Avg │ Unit       │    Peak │   Pct of Peak │
+   ╞═════════╪═════════════════════╪═════════╪════════════╪═════════╪═══════════════╡
+   │ 2.1.15  │ Wavefront Occupancy │ 3291.76 │ Wavefronts │ 3328.00 │         98.91 │
+   ╘═════════╧═════════════════════╧═════════╧════════════╧═════════╧═══════════════╛
+
+
+   --------------------------------------------------------------------------------
+   6. Workgroup Manager (SPI)
+   6.2 Workgroup Manager - Resource Allocation
+   ╒═════════╤════════════════════════════════════════╤═══════╤═══════╤═══════╤════════╕
+   │ Index   │ Metric                                 │   Avg │   Min │   Max │ Unit   │
+   ╞═════════╪════════════════════════════════════════╪═══════╪═══════╪═══════╪════════╡
+   │ 6.2.0   │ Not-scheduled Rate (Workgroup Manager) │  7.72 │  7.72 │  7.72 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.1   │ Not-scheduled Rate (Scheduler-Pipe)    │ 15.17 │ 15.17 │ 15.17 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.2   │ Scheduler-Pipe Stall Rate              │  7.38 │  7.38 │  7.38 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.3   │ Scratch Stall Rate                     │ 39.76 │ 39.76 │ 39.76 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.4   │ Insufficient SIMD Waveslots            │ 26.32 │ 26.32 │ 26.32 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.5   │ Insufficient SIMD VGPRs                │ 26.32 │ 26.32 │ 26.32 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.6   │ Insufficient SIMD SGPRs                │ 25.52 │ 25.52 │ 25.52 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.7   │ Insufficient CU LDS                    │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.8   │ Insufficient CU Barriers               │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.9   │ Reached CU Workgroup Limit             │  0.00 │  0.00 │  0.00 │ Pct    │
+   ├─────────┼────────────────────────────────────────┼───────┼───────┼───────┼────────┤
+   │ 6.2.10  │ Reached CU Wavefront Limit             │  0.00 │  0.00 │  0.00 │ Pct    │
+   ╘═════════╧════════════════════════════════════════╧═══════╧═══════╧═══════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   7. Wavefront
+   7.1 Wavefront Launch Stats
+   ╒═════════╤════════════════════╤═══════╤═══════╤═══════╤════════════════╕
+   │ Index   │ Metric             │   Avg │   Min │   Max │ Unit           │
+   ╞═════════╪════════════════════╪═══════╪═══════╪═══════╪════════════════╡
+   │ 7.1.5   │ VGPRs              │ 64.00 │ 64.00 │ 64.00 │ Registers      │
+   ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤
+   │ 7.1.6   │ AGPRs              │  0.00 │  0.00 │  0.00 │ Registers      │
+   ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤
+   │ 7.1.7   │ SGPRs              │ 80.00 │ 80.00 │ 80.00 │ Registers      │
+   ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤
+   │ 7.1.8   │ LDS Allocation     │  0.00 │  0.00 │  0.00 │ Bytes          │
+   ├─────────┼────────────────────┼───────┼───────┼───────┼────────────────┤
+   │ 7.1.9   │ Scratch Allocation │ 60.00 │ 60.00 │ 60.00 │ Bytes/workitem │
+   ╘═════════╧════════════════════╧═══════╧═══════╧═══════╧════════════════╛
+
+Here we see that our wavefront launch stats (**7.1**) have changed to
+reflect the metadata seen in the ``--save-temps`` output. Of particular
+interest, we see:
+
+* The SGPR allocation (**7.1.7**) is 80 registers, slightly more than the 76
+  requested by the compiler due to allocation granularity, and
+
+* We have a :ref:`"scratch" <memory-spaces>`, that is, private memory,
+  allocation of 60 bytes per work-item.
+
+Analyzing the resource allocation block (**6.2**) we now see that for the
+first time, the "Not-scheduled Rate (Workgroup Manager)" metric (**6.2.0**)
+has become non-zero. This is because the workgroup manager is
+responsible for management of scratch, which we see also contributes to
+our occupancy limiters in the "Scratch Stall Rate" (**6.2.3**). Note that
+the sum of the workgroup manager not-scheduled rate and the
+scheduler-pipe non-scheduled rate is still :math:`\sim25\%`, as in our
+previous examples.
+
+Next, we see that the scheduler-pipe stall rate (**6.2.2**), that is, how often
+we could not schedule a workgroup to a CU, was only about
+:math:`\sim8\%`. This hints that perhaps, our kernel is not
+*particularly* occupancy limited by resources. Indeed, checking the
+wave occupancy metric (**2.1.15**) shows that this kernel is reaching nearly
+99% occupancy.
+
+Finally, we inspect the occupancy limiter metrics and see a roughly even
+split between :ref:`waveslots <desc-valu>` (**6.2.4**), :ref:`VGPRs <desc-valu>`
+(**6.2.5**), and :ref:`SGPRs <desc-salu>` (**6.2.6**) along with the scratch stalls
+(**6.2.3**) previously mentioned.
+
+This is yet another reminder to view occupancy holistically. While these
+metrics tell you why a workgroup cannot be scheduled, they do *not* tell
+you what your occupancy was (consult wavefront occupancy) *nor* whether
+increasing occupancy will be beneficial to performance.
diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/valu-arithmetic-instruction-mix.rst b/projects/rocprofiler-compute/docs/tutorial/includes/valu-arithmetic-instruction-mix.rst
new file mode 100644
index 0000000000..785fc6ecf9
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/includes/valu-arithmetic-instruction-mix.rst
@@ -0,0 +1,113 @@
+.. _valu-arith-instruction-mix-ex:
+
+VALU arithmetic instruction mix
+===============================
+
+ For this example, consider the
+ :dev-sample:`instruction mix sample <instmix.hip>` distributed as a part
+ of Omniperf.
+
+.. note::
+
+   The examples in the section are expected to work on all CDNA™ accelerators.
+   However, the actual experiment results in this section were collected on an
+   :ref:`MI2XX <mixxx-note>` accelerator.
+
+.. _valu-experiment-design:
+
+Design note
+-----------
+
+This code uses a number of inline assembly instructions to cleanly
+identify the types of instructions being issued, as well as to avoid
+optimization / dead-code elimination by the compiler. While inline
+assembly is inherently not portable, this example is expected to work on
+all GCN™ GPUs and CDNA accelerators.
+
+We reproduce a sample of the kernel as follows:
+
+.. code-block:: cpp
+
+   // fp32: add, mul, transcendental and fma
+   float f1, f2;
+   asm volatile(
+       "v_add_f32_e32 %0, %1, %0\n"
+       "v_mul_f32_e32 %0, %1, %0\n"
+       "v_sqrt_f32 %0, %1\n"
+       "v_fma_f32 %0, %1, %0, %1\n"
+       : "=v"(f1)
+       : "v"(f2));
+
+These instructions correspond to:
+
+* A 32-bit floating point addition,
+
+* a 32-bit floating point multiplication,
+
+* a 32-bit floating point square-root transcendental operation, and
+
+* a 32-bit floating point fused multiply-add operation.
+
+For more detail, refer to the `CDNA2 ISA
+Guide <https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf>`__.
+
+Instruction mix
+^^^^^^^^^^^^^^^
+
+ This example was compiled and run on a MI250 accelerator using ROCm
+ v5.6.0, and Omniperf v2.0.0.
+
+.. code-block:: shell
+
+   $ hipcc -O3 instmix.hip -o instmix
+
+Generate the profile for this example using the following command.
+
+.. code-block:: shell
+
+   $ omniperf profile -n instmix --no-roof -- ./instmix
+
+Analyze the instruction mix section.
+
+.. code-block:: shell
+
+   $ omniperf analyze -p workloads/instmix/mi200/ -b 10.2
+   <...>
+   10. Compute Units - Instruction Mix
+   10.2 VALU Arithmetic Instr Mix
+   ╒═════════╤════════════╤═════════╤════════════════╕
+   │ Index   │ Metric     │   Count │ Unit           │
+   ╞═════════╪════════════╪═════════╪════════════════╡
+   │ 10.2.0  │ INT32      │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.1  │ INT64      │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.2  │ F16-ADD    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.3  │ F16-MUL    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.4  │ F16-FMA    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.5  │ F16-Trans  │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.6  │ F32-ADD    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.7  │ F32-MUL    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.8  │ F32-FMA    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.9  │ F32-Trans  │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.10 │ F64-ADD    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.11 │ F64-MUL    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.12 │ F64-FMA    │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.13 │ F64-Trans  │    1.00 │ Instr per wave │
+   ├─────────┼────────────┼─────────┼────────────────┤
+   │ 10.2.14 │ Conversion │    1.00 │ Instr per wave │
+   ╘═════════╧════════════╧═════════╧════════════════╛
+
+This shows that we have exactly one of each type of VALU arithmetic instruction
+by construction.
diff --git a/projects/rocprofiler-compute/docs/tutorial/includes/vector-memory-operation-counting.rst b/projects/rocprofiler-compute/docs/tutorial/includes/vector-memory-operation-counting.rst
new file mode 100644
index 0000000000..e3dd0deb4a
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/includes/vector-memory-operation-counting.rst
@@ -0,0 +1,698 @@
+.. _vmem-example:
+
+Vector memory operation counting
+================================
+
+.. _flat-memory-ex:
+
+Global / Generic (FLAT)
+-----------------------
+
+For this example, consider the
+:dev-sample:`vector memory sample <vmem.hip>` distributed as a part of
+Omniperf. This code launches many different versions of a simple
+read/write/atomic-only kernels targeting various address spaces. For example,
+below is our simple ``global_write`` kernel:
+
+.. code-block:: cpp
+
+   // write to a global pointer
+   __global__ void global_write(int* ptr, int zero) {
+     ptr[threadIdx.x] = zero;
+   }
+
+.. note::
+
+   This example was compiled and run on an MI250 accelerator using ROCm
+   v5.6.0, and Omniperf v2.0.0.
+
+.. code-block:: shell-session
+
+   $ hipcc -O3 --save-temps vmem.hip -o vmem
+
+We have also chosen to include the ``--save-temps`` flag to save the
+compiler temporary files, such as the generated CDNA assembly code, for
+inspection.
+
+Finally, we generate our ``omniperf profile`` as follows.
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n vmem --no-roof -- ./vmem
+
+.. _flat-experiment-design:
+
+Design note
+^^^^^^^^^^^
+
+This section explains some of the more peculiar lines of code in the
+example, for example, the use of compiler built-ins and explicit address space
+casting, and so forth.
+
+.. code-block:: cpp
+
+   // write to a generic pointer
+   typedef int __attribute__((address_space(0)))* generic_ptr;
+
+   __attribute__((noinline)) __device__ void generic_store(generic_ptr ptr, int zero) { *ptr = zero; }
+
+   __global__ void generic_write(int* ptr, int zero, int filter) {
+     __shared__ int lds[1024];
+     int* generic = (threadIdx.x < filter) ? &ptr[threadIdx.x] : &lds[threadIdx.x];
+     generic_store((generic_ptr)generic, zero);
+   }
+
+One of the aims of this example is to demonstrate the use of the
+:llvm-docs:`"generic" FLAT <address-space-identifier>` address space. This
+address space is typically used when the compiler cannot statically prove where
+the backing memory is located.
+
+To try to *force* the compiler to use this address space, we applied
+``__attribute__((noinline))`` to the ``generic_store`` function to have the
+compiler treat it as a function call (that is, on the other side of which, the
+address space may not be known). However, in a trivial example such as this, the
+compiler may choose to specialize the ``generic_store`` function to the two
+address spaces that might provably be used from our translation unit, that is,
+:ref:`"local" (or, LDS) <memory-spaces>` and :ref:`"global" <memory-spaces>`.
+Hence, we forcibly cast the address space to
+:ref:`"generic" (or, FLAT) <memory-spaces>` to avoid this compiler
+optimization.
+
+.. warning::
+
+   While convenient for this example, this sort of explicit address space
+   casting can lead to strange compilation errors, and in the worst case,
+   incorrect results. As a result, use is discouraged in production code.
+
+For more details on address spaces, refer to
+:ref:`memory-spaces`.
+
+Global write
+^^^^^^^^^^^^
+
+First, we demonstrate our simple ``global_write`` kernel:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 1 -b 10.3 15.1.4 15.1.5 15.1.6 15.1.7 15.1.8 15.1.9 15.1.10 15.1.11  -n per_kernel
+   <...>
+   --------------------------------------------------------------------------------
+   0. Top Stat
+   ╒════╤═════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+   │    │ KernelName                          │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪═════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+   │  0 │ global_write(int*, int) [clone .kd] │    1.00 │   2400.00 │    2400.00 │      2400.00 │ 100.00 │
+   ╘════╧═════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   15. Address Processing Unit and Data Return Path (TA/TD)
+   15.1 Address Processing Unit
+   ╒═════════╤═════════════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                      │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═════════════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 15.1.4  │ Total Instructions          │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.5  │ Global/Generic Instr        │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.6  │ Global/Generic Read Instr   │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.7  │ Global/Generic Write Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.8  │ Global/Generic Atomic Instr │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.9  │ Spill/Stack Instr           │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.10 │ Spill/Stack Read Instr      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼─────────────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 15.1.11 │ Spill/Stack Write Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═════════════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+Here, we have presented both the information in the VMEM Instruction Mix
+table (**10.3**) and the Address Processing Unit (**15.1**). We note that this
+data is expected to be identical, and hence we omit table 15.1 in our
+subsequent examples.
+
+In addition, as expected, we see a single Global/Generic Write
+instruction (**10.3.2**, **15.1.7**). Inspecting the generated assembly, we get:
+
+.. code-block:: asm
+
+           .protected      _Z12global_writePii     ; -- Begin function _Z12global_writePii
+           .globl  _Z12global_writePii
+           .p2align        8
+           .type   _Z12global_writePii,@function
+   _Z12global_writePii:                    ; @_Z12global_writePii
+   ; %bb.0:
+           s_load_dword s2, s[4:5], 0x8
+           s_load_dwordx2 s[0:1], s[4:5], 0x0
+           v_lshlrev_b32_e32 v0, 2, v0
+           s_waitcnt lgkmcnt(0)
+           v_mov_b32_e32 v1, s2
+           global_store_dword v0, v1, s[0:1]
+           s_endpgm
+           .section        .rodata,#alloc
+           .p2align        6, 0x0
+           .amdhsa_kernel _Z12global_writePii
+
+Notice that this corresponds to an instance of a ``global_store_dword``
+operation.
+
+.. note::
+
+   The assembly in these experiments were generated for an
+   :ref:`MI2XX <mixxx-note>` accelerator using ROCm 5.6.0, and may change
+   depending on ROCm versions and the targeted hardware architecture.
+
+.. _generic-write-ex:
+
+Generic write to LDS
+^^^^^^^^^^^^^^^^^^^^
+
+Next, we examine a generic write. As discussed
+:ref:`previously <flat-experiment-design>`, our ``generic_write`` kernel uses an
+address space cast to *force* the compiler to choose our desired address
+space, regardless of other optimizations that may be possible.
+
+Also note that the ``filter`` parameter passed in as a kernel argument (see
+:dev-sample:`example <vmem.hip>` and
+:ref:`design note <flat-experiment-design>`) is set to zero on the host, such
+that we always write to the :doc:`local </conceptual/local-data-share>` (LDS)
+memory allocation ``lds``.
+
+Examining this kernel in the VMEM Instruction Mix table yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 2 -b 10.3 -n per_kernel
+   <...>
+   0. Top Stat
+   ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+   │    │ KernelName                               │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+   │  0 │ generic_write(int*, int, int) [clone .kd │    1.00 │   2880.00 │    2880.00 │      2880.00 │ 100.00 │
+   │    │ ]                                        │         │           │            │              │        │
+   ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+As expected we see a single generic write (**10.3.2**). In the assembly
+generated for this kernel (in particular, we care about the
+``generic_store`` function), we see that this corresponds to a
+``flat_store_dword`` instruction:
+
+.. code-block:: asm
+
+           .type   _Z13generic_storePii,@function
+   _Z13generic_storePii:                   ; @_Z13generic_storePii
+   ; %bb.0:
+           s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+           flat_store_dword v[0:1], v2
+           s_waitcnt vmcnt(0) lgkmcnt(0)
+           s_setpc_b64 s[30:31]
+   .Lfunc_end0:
+
+In addition, we note that we can observe the destination of this request
+by looking at the LDS Instructions metric (**12.2.0**) -- which indicates one LDS
+access.
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 2 -b 12.2.0 -n per_kernel
+   <...>
+   12. Local Data Share (LDS)
+   12.2 LDS Stats
+   ╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric     │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 12.2.0  │ LDS Instrs │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+.. note::
+
+   Exercise for the reader: if this access had been targeted at global memory
+   (for instance, by changing value of ``filter``), where should we look for the
+   memory traffic?  Hint: see the :ref:`generic read <generic-read-ex>` example.
+
+.. _global-read-ex:
+
+Global read
+^^^^^^^^^^^
+
+Next, we examine a simple global read operation:
+
+.. code-block:: cpp
+
+   __global__ void global_read(int* ptr, int zero) {
+     int x = ptr[threadIdx.x];
+     if (x != zero) {
+       ptr[threadIdx.x] = x + 1;
+     }
+   }
+
+Here we observe a now familiar pattern:
+
+- Read a value in from global memory.
+
+- Have a write hidden behind a conditional that is impossible for
+  the compiler to statically eliminate, but is identically false. In this
+  case, our ``main()`` function initializes the data in ``ptr`` to zero.
+
+Running Omniperf on this kernel yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 3 -b 10.3 -n per_kernel
+   <...>
+   0. Top Stat
+   ╒════╤════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+   │    │ KernelName                         │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+   │  0 │ global_read(int*, int) [clone .kd] │    1.00 │   4480.00 │    4480.00 │      4480.00 │ 100.00 │
+   ╘════╧════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+Here we see a single global/generic instruction (**10.3.0**) which, as
+expected, is a read (**10.3.1**).
+
+.. _generic-read-ex:
+
+Generic read from global memory
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For our generic read example, we choose to change our target for the
+generic read to be global memory:
+
+.. code-block:: cpp
+
+   __global__ void generic_read(int* ptr, int zero, int filter) {
+     __shared__ int lds[1024];
+     if (static_cast<int>(filter - 1) == zero) {
+       lds[threadIdx.x] = 0; // initialize to zero to avoid conditional, but hide behind _another_ conditional
+     }
+     int* generic;
+     if (static_cast<int>(threadIdx.x) > filter - 1) {
+       generic = &ptr[threadIdx.x];
+     } else {
+       generic = &lds[threadIdx.x];
+       abort();
+     }
+     int x = generic_load((generic_ptr)generic);
+     if (x != zero) {
+       ptr[threadIdx.x] = x + 1;
+     }
+   }
+
+In addition to our usual ``if (condition_that_wont_happen)`` guard
+around the write operation, there is an additional conditional around
+the initialization of the ``lds`` buffer. We note that it’s typically
+required to write to this buffer to prevent the compiler from
+eliminating the local memory branch entirely due to undefined behavior
+(use of an uninitialized value). However, to report *only* our global
+memory read, we again hide this initialization behind an identically
+false conditional (both ``zero`` and ``filter`` are set to zero in the
+kernel launch). Note that this is a *different* conditional from our
+pointer assignment (to avoid combination of the two).
+
+Running Omniperf on this kernel reports:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 4 -b 10.3 12.2.0 16.3.10 -n per_kernel
+   <...>
+   0. Top Stat
+   ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+   │    │ KernelName                               │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+   │  0 │ generic_read(int*, int, int) [clone .kd] │    1.00 │   2240.00 │    2240.00 │      2240.00 │ 100.00 │
+   ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   12. Local Data Share (LDS)
+   12.2 LDS Stats
+   ╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric     │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 12.2.0  │ LDS Instrs │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   16. Vector L1 Data Cache
+   16.3 L1D Cache Accesses
+   ╒═════════╤════════════╤═══════╤═══════╤═══════╤════════════════╕
+   │ Index   │ Metric     │   Avg │   Min │   Max │ Unit           │
+   ╞═════════╪════════════╪═══════╪═══════╪═══════╪════════════════╡
+   │ 16.3.10 │ L1-L2 Read │  1.00 │  1.00 │  1.00 │ Req per kernel │
+   ╘═════════╧════════════╧═══════╧═══════╧═══════╧════════════════╛
+
+Here we observe:
+
+- A single global/generic read operation (**10.3.1**), which
+
+- Is not an LDS instruction (**12.2**), as seen in the
+  :ref:`generic write <generic-write-ex>` example, but is instead
+
+- An L1-L2 read operation (**16.3.10**)
+
+That is, we have successfully targeted our generic read at global
+memory. Inspecting the assembly shows this corresponds to a
+``flat_load_dword`` instruction.
+
+.. _global-atomic-ex:
+
+Global atomic
+^^^^^^^^^^^^^
+
+Our global atomic kernel simply atomically adds a (non-compile-time) zero value
+to a pointer.
+
+.. code-block:: cpp
+
+   __global__ void global_atomic(int* ptr, int zero) {
+     atomicAdd(ptr, zero);
+   }
+
+
+Running Omniperf on this kernel yields:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 5 -b 10.3 16.3.12 -n per_kernel
+   <...>
+   0. Top Stat
+   ╒════╤══════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+   │    │ KernelName                           │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪══════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+   │  0 │ global_atomic(int*, int) [clone .kd] │    1.00 │   4640.00 │    4640.00 │      4640.00 │ 100.00 │
+   ╘════╧══════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+   --------------------------------------------------------------------------------
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   16. Vector L1 Data Cache
+   16.3 L1D Cache Accesses
+   ╒═════════╤══════════════╤═══════╤═══════╤═══════╤════════════════╕
+   │ Index   │ Metric       │   Avg │   Min │   Max │ Unit           │
+   ╞═════════╪══════════════╪═══════╪═══════╪═══════╪════════════════╡
+   │ 16.3.12 │ L1-L2 Atomic │  1.00 │  1.00 │  1.00 │ Req per kernel │
+   ╘═════════╧══════════════╧═══════╧═══════╧═══════╧════════════════╛
+
+Here we see a single global/generic atomic instruction (**10.3.3**), which
+corresponds to an L1-L2 atomic request (**16.3.12**).
+
+.. _generic-mixed-atomic-ex:
+
+Generic, mixed atomic
+^^^^^^^^^^^^^^^^^^^^^
+
+In our final global/generic example, we look at a case where our generic
+operation targets both LDS and global memory:
+
+.. code-block:: cpp
+
+   __global__ void generic_atomic(int* ptr, int filter, int zero) {
+     __shared__ int lds[1024];
+     int* generic = (threadIdx.x % 2 == filter) ? &ptr[threadIdx.x] : &lds[threadIdx.x];
+     generic_atomic((generic_ptr)generic, zero);
+   }
+
+This assigns every other work-item to atomically update global memory or
+local memory.
+
+Running this kernel through Omniperf shows:
+
+.. code-block:: shell-session
+
+   $ omniperf analyze -p workloads/vmem/mi200/ --dispatch 6 -b 10.3 12.2.0 16.3.12 -n per_kernel
+   <...>
+   0. Top Stat
+   ╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
+   │    │ KernelName                               │   Count │   Sum(ns) │   Mean(ns) │   Median(ns) │    Pct │
+   ╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
+   │  0 │ generic_atomic(int*, int, int) [clone .k │    1.00 │   3360.00 │    3360.00 │      3360.00 │ 100.00 │
+   │    │ d]                                       │         │           │            │              │        │
+   ╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
+
+
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   12. Local Data Share (LDS)
+   12.2 LDS Stats
+   ╒═════════╤════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric     │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 12.2.0  │ LDS Instrs │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ╘═════════╧════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   16. Vector L1 Data Cache
+   16.3 L1D Cache Accesses
+   ╒═════════╤══════════════╤═══════╤═══════╤═══════╤════════════════╕
+   │ Index   │ Metric       │   Avg │   Min │   Max │ Unit           │
+   ╞═════════╪══════════════╪═══════╪═══════╪═══════╪════════════════╡
+   │ 16.3.12 │ L1-L2 Atomic │  1.00 │  1.00 │  1.00 │ Req per kernel │
+   ╘═════════╧══════════════╧═══════╧═══════╧═══════╧════════════════╛
+
+That is, we see:
+
+- A single generic atomic instruction (**10.3.3**) that maps to both
+
+- An LDS instruction (**12.2.0**), and
+
+- An L1-L2 atomic request (**16.3**)
+
+We have demonstrated the ability of the generic address space to
+*dynamically* target different backing memory.
+
+.. _spill-scratch:
+
+Spill/Scratch (BUFFER)
+----------------------
+
+Next we examine the use of "Spill/Scratch" memory. On current CDNA
+accelerators such as the :ref:`MI2XX <mixxx-note>`, this is implemented using
+the :ref:`private <memory-spaces>` memory space, which maps to
+:llvm-docs:`"scratch" memory <amdgpu-address-spaces>` in AMDGPU hardware
+terminology. This type of memory can be accessed via different instructions
+depending on the specific architecture targeted. However, current CDNA
+accelerators such as the :ref:`MI2XX <mixxx-note>` use so called ``buffer``
+instructions to access private memory in a simple (and typically) coalesced
+manner. See
+:mi200-isa-pdf:`Sec. 9.1, "Vector Memory Buffer Instructions" of the CDNA2 ISA guide <>`
+for further reading on this instruction type.
+
+We develop a `simple
+kernel <https://github.com/ROCm/omniperf/blob/dev/sample/stack.hip>`__
+that uses stack memory:
+
+.. code-block:: cpp
+
+   #include <hip/hip_runtime.h>
+   __global__ void knl(int* out, int filter) {
+     int x[1024];
+     x[filter] = 0;
+     if (threadIdx.x < filter)
+       out[threadIdx.x] = x[threadIdx.x];
+   }
+
+Our strategy here is to:
+
+* Create a large stack buffer (that cannot reasonably fit into registers) - Write to a compile-time unknown
+  location on the stack, and then
+
+* Behind the typical compile-time unknown ``if(condition_that_wont_happen)``
+
+* Read from a different, compile-time unknown, location on the stack and write
+  to global memory to prevent the compiler from optimizing it out.
+
+This example was compiled and run on an MI250 accelerator using ROCm v5.6.0, and
+Omniperf v2.0.0.
+
+.. code-block:: shell-session
+
+   $ hipcc -O3 stack.hip -o stack.hip
+
+And profiled using Omniperf:
+
+.. code-block:: shell-session
+
+   $ omniperf profile -n stack --no-roof -- ./stack
+   <...>
+   $ omniperf analyze -p workloads/stack/mi200/  -b 10.3 16.3.11 -n per_kernel
+   <...>
+   10. Compute Units - Instruction Mix
+   10.3 VMEM Instr Mix
+   ╒═════════╤═══════════════════════╤═══════╤═══════╤═══════╤══════════════════╕
+   │ Index   │ Metric                │   Avg │   Min │   Max │ Unit             │
+   ╞═════════╪═══════════════════════╪═══════╪═══════╪═══════╪══════════════════╡
+   │ 10.3.0  │ Global/Generic Instr  │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.1  │ Global/Generic Read   │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.2  │ Global/Generic Write  │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.3  │ Global/Generic Atomic │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.4  │ Spill/Stack Instr     │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.5  │ Spill/Stack Read      │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.6  │ Spill/Stack Write     │  1.00 │  1.00 │  1.00 │ Instr per kernel │
+   ├─────────┼───────────────────────┼───────┼───────┼───────┼──────────────────┤
+   │ 10.3.7  │ Spill/Stack Atomic    │  0.00 │  0.00 │  0.00 │ Instr per kernel │
+   ╘═════════╧═══════════════════════╧═══════╧═══════╧═══════╧══════════════════╛
+
+
+   --------------------------------------------------------------------------------
+   16. Vector L1 Data Cache
+   16.3 L1D Cache Accesses
+   ╒═════════╤═════════════╤═══════╤═══════╤═══════╤════════════════╕
+   │ Index   │ Metric      │   Avg │   Min │   Max │ Unit           │
+   ╞═════════╪═════════════╪═══════╪═══════╪═══════╪════════════════╡
+   │ 16.3.11 │ L1-L2 Write │  1.00 │  1.00 │  1.00 │ Req per kernel │
+   ╘═════════╧═════════════╧═══════╧═══════╧═══════╧════════════════╛
+
+Here we see a single write to the stack (**10.3.6**), which corresponds to
+an L1-L2 write request (**16.3.11**), that is, the stack is backed by global
+memory and travels through the same memory hierarchy.
diff --git a/projects/rocprofiler-compute/docs/tutorial/learning-resources.rst b/projects/rocprofiler-compute/docs/tutorial/learning-resources.rst
new file mode 100644
index 0000000000..931f1f7f1d
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/learning-resources.rst
@@ -0,0 +1,22 @@
+.. meta::
+   :description: Omniperf external training resources
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD,
+              training, examples
+
+******************
+Learning resources
+******************
+
+This section is a catalog of external resources and third-party content that
+can help you learn Omniperf. Some areas of the following content might be
+outdated.
+
+Introduction to Omniperf
+  :fab:`youtube` `AMD profiling workshop (Pawsey Supercomputing Research Centre) <https://www.youtube.com/watch?v=9AkxBCiInCw>`_
+
+Omniperf example exercises
+  `<https://github.com/amd/HPCTrainingExamples/tree/main/OmniperfExamples>`__
+
+AMD Instinct™ tuning guides
+  :doc:`rocm:how-to/tuning-guides/mi300x/workload`
+
diff --git a/projects/rocprofiler-compute/docs/tutorial/profiling-by-example.rst b/projects/rocprofiler-compute/docs/tutorial/profiling-by-example.rst
new file mode 100644
index 0000000000..8a9c85c03b
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/tutorial/profiling-by-example.rst
@@ -0,0 +1,23 @@
+.. meta::
+   :description: Omniperf: Profiling by example
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD
+
+********************
+Profiling by example
+********************
+
+The following examples refer to sample :doc:`HIP <hip:index>` code located in
+:fab:`github` :dev-sample:`ROCm/omniperf/blob/dev/sample <>` and distributed
+as part of Omniperf.
+
+.. include:: ./includes/valu-arithmetic-instruction-mix.rst
+
+.. include:: ./includes/infinity-fabric-transactions.rst
+
+.. include:: ./includes/vector-memory-operation-counting.rst
+
+.. include:: ./includes/instructions-per-cycle-and-utilizations.rst
+
+.. include:: ./includes/lds-examples.rst
+
+.. include:: ./includes/occupancy-limiters-example.rst
diff --git a/projects/rocprofiler-compute/docs/what-is-omniperf.rst b/projects/rocprofiler-compute/docs/what-is-omniperf.rst
new file mode 100644
index 0000000000..473be896ff
--- /dev/null
+++ b/projects/rocprofiler-compute/docs/what-is-omniperf.rst
@@ -0,0 +1,129 @@
+.. meta::
+   :description: What is Omniperf?
+   :keywords: Omniperf, ROCm, profiler, tool, Instinct, accelerator, AMD
+
+*****************
+What is Omniperf?
+*****************
+
+Omniperf is a kernel-level profiling tool for machine learning and high
+performance computing (HPC) workloads running on AMD Instinct™ accelerators.
+
+AMD Instinct MI-series accelerators are data center-class GPUs designed for
+compute and have some graphics capabilities disabled or removed. Omniperf
+primarily targets use with
+:doc:`accelerators in the MI300, MI200, and MI100 families <rocm:conceptual/gpu-arch>`.
+Development is in progress to support Radeon™ (RDNA) GPUs.
+
+Omniperf is built on top of :doc:`ROCProfiler <rocprofiler:rocprofv1>` to
+monitor hardware performance counters.
+
+.. _high-level-design:
+
+High-level design of Omniperf
+=============================
+
+The architecture of Omniperf consists of three major components shown in the
+following diagram.
+
+Core Omniperf profiler
+----------------------
+
+Acquires raw performance counters via application replay using ``rocprof``.
+Counters are stored in a comma-separated-values format for further
+:doc:`analysis <how-to/analyze/mode>`. It runs a set of accelerator-specific
+micro-benchmarks to acquire hierarchical roofline data. The roofline model is
+not available on accelerators pre-MI200.
+
+Grafana server for Omniperf
+---------------------------
+
+* **Grafana database import**: All raw performance counters are imported into
+  a :ref:`backend MongoDB database <grafana-mongodb-setup>` to support
+  analysis and visualization in the Grafana GUI. Compatibility with
+  previously generated data using older Omniperf versions is not guaranteed.
+
+* **Grafana analysis dashboard GUI**: The
+  :doc:`Grafana dashboard <how-to/analyze/grafana-gui>` retrieves the raw
+  counters information from the backend database. It displays the relevant
+  performance metrics and visualization.
+
+Omniperf standalone GUI analyzer
+--------------------------------
+
+Omniperf provides a :doc:`standalone GUI <how-to/analyze/standalone-gui>` to
+enable basic performance analysis without the need to import data into a
+database instance. Find setup instructions in :doc:`install/grafana-setup`
+
+.. image:: data/install/omniperf_server_vs_client_install.png
+   :align: center
+   :alt: Architectural design of Omniperf
+   :width: 800
+
+Omniperf features
+=================
+
+Omniperf offers comprehensive profiling based on all available hardware counters
+for the target accelerator. It delivers advanced performance analysis features,
+such as system Speed-of-Light (SOL) and hardware block-level SOL evaluations.
+Additionally, Omniperf provides in-depth memory chart analysis, roofline
+analysis, baseline comparisons, and more, ensuring a thorough understanding of
+system performance.
+
+Omniperf supports analysis through both the :doc:`command line </how-to/analyze/cli>` or a
+:doc:`GUI </how-to/analyze/grafana-gui>`. The following list describes Omniperf's features at a
+high level.
+
+* :doc:`Support for AMD Instinct MI300, MI200, and MI100 accelerators <reference/compatible-accelerators>`
+
+* :doc:`Standalone GUI analyzer </how-to/analyze/standalone-gui>`
+
+* :doc:`GUI analyzer via Grafana and MongoDB </how-to/analyze/grafana-gui>`
+
+  * :ref:`System Info panel <grafana-panel-sys-info>`
+
+  * :ref:`Kernel Statistic panel <grafana-panel-kernel-stats>`
+
+  * :ref:`System Speed-of-Light panel <grafana-panel-system-sol>`
+
+  * :ref:`Memory Chart Analysis panel <grafana-panel-memory-chart-analysis>`
+
+  * :ref:`Roofline Analysis panel <grafana-panel-roofline-analysis>`
+    (*Supported on MI200 only, Ubuntu 20.04, SLES 15 SP3 or RHEL8*)
+
+  * :ref:`Command Processor (CP) panel <grafana-panel-cp>`
+
+  * :ref:`Workgroup Manager (SPI) panel <grafana-panel-spi>`
+
+  * :ref:`Wavefront Launch panel <grafana-panel-wavefront>`
+
+  * :ref:`Compute Unit - Instruction Mix panel <grafana-panel-cu-instruction-mix>`
+
+  * :ref:`Compute Unit - Pipeline panel <grafana-panel-cu-compute-pipeline>`
+
+  * :ref:`Local Data Share (LDS) panel <grafana-panel-lds>`
+
+  * :ref:`Instruction Cache panel <grafana-panel-instruction-cache>`
+
+  * :ref:`Scalar L1D Cache panel <grafana-panel-sl1d-cache>`
+
+  * :ref:`L1 Address Processing Unit, or, Texture Addresser (TA) <grafana-panel-ta>`
+    and :ref:`L1 Backend Data Processing Unit, or, Texture Data (TD) <grafana-panel-td>` panels
+
+  * :ref:`Vector L1D Cache panel <grafana-panel-vl1d>`
+
+  * :ref:`L2 Cache panel <grafana-panel-l2-cache>`
+
+  * :ref:`L2 Cache (per-channel) panel <grafana-panel-l2-cache-per-channel>`
+
+* :ref:`Filtering <filtering>` to reduce profiling time
+
+  * Filtering by dispatch
+
+  * Filter by kernel
+
+  * Filtering by GPU ID
+
+* :ref:`Baseline comparisons <analysis-baseline-comparison>`
+
+* :ref:`Multiple normalizations <normalization-units>`