diff --git a/CHANGELOG.md b/CHANGELOG.md
index dcfc5d1f67..71e1af9296 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,11 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
 
 * Add Docker files to package the application and dependencies into a single portable and executable standalone binary file
 
+* Analysis report based filtering
+  * -b option in profile mode now additionally accepts metric id(s) for analysis report based filtering
+  * -b option in profile mode also accept hardware IP block for filtering, however, this support will be deprecated soon
+  * --list-metrics option added in profile mode to list possible metric id(s), similar to analyze mode
+
 ### Changed
 
 * Change normal_unit default to per_kernel
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f2ca926314..39b4627be8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -244,6 +244,13 @@ add_test(
             ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 
+add_test(
+    NAME test_profile_section
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -m section --junitxml=tests/test_profile_misc.xml
+        ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+
 set_tests_properties(
     test_profile_kernel_execution
     test_profile_ipblocks
diff --git a/docs/how-to/profile/mode.rst b/docs/how-to/profile/mode.rst
index d6b9c7ce89..0d6c72434e 100644
--- a/docs/how-to/profile/mode.rst
+++ b/docs/how-to/profile/mode.rst
@@ -230,7 +230,7 @@ Filtering options
 -----------------
 
 ``-b``, ``--block <block-name>``
-   Allows system profiling on one or more selected hardware components to speed
+   Allows system profiling on one or more selected hardware report blocks to speed
    up the profiling process. See :ref:`profiling-hw-component-filtering`.
 
 ``-k``, ``--kernel <kernel-substr>``
@@ -251,21 +251,91 @@ Filtering options
 
 .. _profiling-hw-component-filtering:
 
-Hardware component filtering
+Hardware report block filtering
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-You can profile specific hardware components to speed up the profiling process.
-In ROCm Compute Profiler, the term hardware block to refers to a hardware component or a
-group of hardware components. All profiling results are accumulated in the same
-target directory without overwriting those for other hardware components. This
-enables incremental profiling and analysis.
+You can profile specific hardware report blocks to speed up the profiling process.
+In ROCm Compute Profiler, the term hardware report block refers to a section of the
+analysis report which focuses on metrics associated with a hardware component or
+a group of hardware components. All profiling results are accumulated in the same
+target directory without overwriting those for other hardware components.
+This enables incremental profiling and analysis.
 
-The following example only gathers hardware counters for the shader sequencer
-(SQ) and L2 cache (TCC) components, skipping all other hardware components.
+The following example only gathers hardware counters used to calculate metrics
+for ``Compute Unit - Instruction Mix`` (block 10) and ``Wavefront Launch Statistics``
+(block 7) sections of the analysis report, while skipping over all other hardware counters.
 
 .. code-block:: shell-session
 
-   $ rocprof-compute profile --name vcopy -b SQ TCC -- ./vcopy -n 1048576 -b 256
+   $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256
+
+                                    __                                       _
+    _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
+   | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
+   | | | (_) | (__| |_) | | | (_) |  _|_____| (_| (_) | | | | | | |_) | |_| | ||  __/
+   |_|  \___/ \___| .__/|_|  \___/|_|        \___\___/|_| |_| |_| .__/ \__,_|\__\___|
+                  |_|                                           |_|
+
+   rocprofiler-compute version: 2.0.0
+   Profiler choice: rocprofv1
+   Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200
+   Target: MI200
+   Command: ./vcopy -n 1048576 -b 256
+   Kernel Selection: None
+   Dispatch Selection: None
+   Hardware Blocks: []
+   Report Sections: ['10', '7']
+
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   Collecting Performance Counters
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   ...
+
+
+To see a list of available hardware report blocks, use the ``--list-metrics`` option.
+
+.. code-block:: shell-session
+
+   $ rocprof-compute profile --list-metrics
+
+                                    __                                       _
+    _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
+   | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
+   | | | (_) | (__| |_) | | | (_) |  _|_____| (_| (_) | | | | | | |_) | |_| | ||  __/
+   |_|  \___/ \___| .__/|_|  \___/|_|        \___\___/|_| |_| |_| .__/ \__,_|\__\___|
+                  |_|                                           |_|
+
+   0 -> Top Stats
+   1 -> System Info
+   2 -> System Speed-of-Light
+         2.1 -> Speed-of-Light
+                  2.1.0 -> VALU FLOPs
+                  2.1.1 -> VALU IOPs
+                  2.1.2 -> MFMA FLOPs (F8)
+   ...
+   5 -> Command Processor (CPC/CPF)
+         5.1 -> Command Processor Fetcher
+                  5.1.0 -> CPF Utilization
+                  5.1.1 -> CPF Stall
+                  5.1.2 -> CPF-L2 Utilization
+         5.2 -> Packet Processor
+                  5.2.0 -> CPC Utilization
+                  5.2.1 -> CPC Stall Rate
+                  5.2.5 -> CPC-UTCL1 Stall
+   ...
+   6 -> Workgroup Manager (SPI)
+         6.1 -> Workgroup Manager Utilizations
+                  6.1.0 -> Accelerator Utilization
+                  6.1.1 -> Scheduler-Pipe Utilization
+                  6.1.2 -> Workgroup Manager Utilization
+
+
+It is also possible to filter counter collection by hardware component such as Shader Sequencer (SQ)
+and L2 cache (TCC) as shown below.
+
+.. code-block:: shell-session
+
+   $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256
 
                                     __                                       _
     _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
@@ -297,12 +367,18 @@ The following example only gathers hardware counters for the shader sequencer
    Kernel Selection: None
    Dispatch Selection: None
    Hardware Blocks: ['sq', 'tcc']
+   Report Sections: []
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Collecting Performance Counters
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ...
 
+.. warning::
+
+   Filtering by hardware components (e.g. SQ, TCC) will soon be deprecated.
+   It is recommended to use hardware report block based filtering.
+
 .. _profiling-kernel-filtering:
 
 Kernel filtering
diff --git a/docs/how-to/use.rst b/docs/how-to/use.rst
index e138e5a41b..4ac2b6bd1d 100644
--- a/docs/how-to/use.rst
+++ b/docs/how-to/use.rst
@@ -57,17 +57,17 @@ Common filters to customize data collection include:
    Enables filtering based on dispatch ID.
 
 ``-b``, ``--block``
-   Enables collection metrics for only the specified (one or more) hardware
-   component blocks.
+   Enables collection metrics for only the specified hardware report blocks.
 
 See :ref:`Filtering <filtering>` for an in-depth walkthrough.
 
-To view available metrics by hardware block, use the ``--list-metrics``
-argument:
+To view available metrics by hardware block, use the ``profile`` mode ``--list-metrics``
+option with an optional system architecture argument (inferred if not provided):
 
 .. code-block:: shell
 
-   $ rocprof-compute analyze --list-metrics <sys_arch>
+   $ rocprof-compute profile --list-metrics
+   $ rocprof-compute profile --list-metrics <sys_arch>
 
 .. _basic-analyze-cli:
 
@@ -80,7 +80,7 @@ interface with profiling results. View different metrics derived from your
 profiled results and get immediate access all metrics organized by hardware
 blocks.
 
-If you don't apply kernel, dispatch, or hardware block filters at this stage,
+If you don't apply kernel, dispatch, or hardware report block filters at this stage,
 analysis is reflective of the entirety of the profiling data.
 
 To interact with profiling results from a different session, provide the
diff --git a/pyproject.toml b/pyproject.toml
index 2308351d8e..a5cfb0c168 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ pythonpath = [
     ]
 
 markers = [
+	"section",
 	"kernel_execution",
 	"block",
 	"misc",
diff --git a/src/argparser.py b/src/argparser.py
index d412063124..ce879a2905 100644
--- a/src/argparser.py
+++ b/src/argparser.py
@@ -24,14 +24,15 @@
 
 import argparse
 import os
+import re
 import shutil
 from pathlib import Path
 
 
 def print_avail_arch(avail_arch: list):
-    ret_str = "\t\tList all available metrics for analysis on specified arch:"
+    ret_str = "\t\t\tList all available metrics for analysis on specified arch:"
     for arch in avail_arch:
-        ret_str += "\n\t\t   {}".format(arch)
+        ret_str += "\n\t\t\t   {}".format(arch)
     return ret_str
 
 
@@ -114,7 +115,6 @@ Examples:
         type=str,
         metavar="",
         dest="name",
-        required=True,
         help="\t\t\tAssign a name to workload.",
     )
     profile_group.add_argument("--target", type=str, default=None, help=argparse.SUPPRESS)
@@ -154,7 +154,7 @@ Examples:
         default=False,
         action="store_true",
         help=argparse.SUPPRESS,
-        #help="\t\t\tKokkos trace, traces Kokkos API calls.",
+        # help="\t\t\tKokkos trace, traces Kokkos API calls.",
     )
     profile_group.add_argument(
         "-k",
@@ -177,16 +177,67 @@ Examples:
         required=False,
         help="\t\t\tDispatch ID filtering.",
     )
+
+    class AggregateDict(argparse.Action):
+        def __call__(self, parser, namespace, values, option_string=None):
+            aggregated_dict = getattr(namespace, self.dest, {})
+            if aggregated_dict is None:
+                aggregated_dict = {}
+            for key, value in values:
+                aggregated_dict[key] = value
+            setattr(namespace, self.dest, aggregated_dict)
+
+    def validate_block(value):
+        # Metric id regex, for example, 10, 4, 4.3, 4.32
+        # Dont allow more than two digits after decimal point
+        metric_id_pattern = re.compile(r"^\d+$|^\d\.\d$|^\d+\.\d\d$")
+        # Allow only the following hardware blocks
+        hardware_block_pattern = re.compile(r"^(SQ|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF)$")
+        if metric_id_pattern.match(value):
+            return (str(value), "metric_id")
+        if hardware_block_pattern.match(value):
+            return (str(value), "hardware_block")
+        raise argparse.ArgumentTypeError(f"Invalid hardware block or metric id: {value}")
+
     profile_group.add_argument(
         "-b",
         "--block",
-        type=str,
-        dest="ipblocks",
+        type=validate_block,
+        action=AggregateDict,
+        dest="filter_blocks",
         metavar="",
         nargs="+",
         required=False,
-        choices=["SQ", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"],
-        help="\t\t\tHardware block filtering:\n\t\t\t   SQ\n\t\t\t   SQC\n\t\t\t   TA\n\t\t\t   TD\n\t\t\t   TCP\n\t\t\t   TCC\n\t\t\t   SPI\n\t\t\t   CPC\n\t\t\t   CPF",
+        default={},
+        help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 10, 4, 4.3).
+    \t\t\tCan provide multiple space separated arguments.
+    \t\t\tCan also accept Hardware blocks.
+    \t\t\tHardware block filtering (to be deprecated soon):
+    \t\t\t   SQ
+    \t\t\t   SQC
+    \t\t\t   TA
+    \t\t\t   TD
+    \t\t\t   TCP
+    \t\t\t   TCC
+    \t\t\t   SPI
+    \t\t\t   CPC
+    \t\t\t   CPF""",
+    )
+    profile_group.add_argument(
+        "--list-metrics",
+        metavar="",
+        nargs="?",
+        const="",
+        # Argument to --list-metrics is optional
+        choices=[""] + list(supported_archs.keys()),  # ["gfx906", "gfx908", "gfx90a"],
+        help=print_avail_arch(supported_archs.keys()),
+    )
+    profile_group.add_argument(
+        "--config-dir",
+        dest="config_dir",
+        metavar="",
+        help="\t\t\tSpecify the directory of customized report section configs.",
+        default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"),
     )
 
     result = shutil.which("rocscope")
@@ -487,7 +538,7 @@ Examples:
         dest="filter_metrics",
         metavar="",
         nargs="+",
-        help="\t\tSpecify hardware block/metric id(s) from --list-metrics for filtering.",
+        help="\t\tSpecify metric id(s) from --list-metrics for filtering.",
     )
     analyze_group.add_argument(
         "--gpu-id",
diff --git a/src/rocprof_compute_analyze/analysis_base.py b/src/rocprof_compute_analyze/analysis_base.py
index 51e7778d79..5591c1efe1 100644
--- a/src/rocprof_compute_analyze/analysis_base.py
+++ b/src/rocprof_compute_analyze/analysis_base.py
@@ -45,6 +45,7 @@ class OmniAnalyze_Base:
         self.__args = args
         self._runs = OrderedDict()
         self._arch_configs = {}
+        self._profiling_config = dict()
         self.__supported_archs = supported_archs
         self._output = None
         self.__socs: dict = None  # available OmniSoC objs
@@ -254,6 +255,9 @@ class OmniAnalyze_Base:
             open(self.__args.output_file, "w+") if self.__args.output_file else sys.stdout
         )
 
+        # Read profiling config
+        self._profiling_config = file_io.load_profiling_config(self.__args.path[0][0])
+
         # initalize runs
         self._runs = self.initalize_runs()
 
diff --git a/src/rocprof_compute_analyze/analysis_cli.py b/src/rocprof_compute_analyze/analysis_cli.py
index facfad01f2..b370c070f0 100644
--- a/src/rocprof_compute_analyze/analysis_cli.py
+++ b/src/rocprof_compute_analyze/analysis_cli.py
@@ -100,4 +100,5 @@ class cli_analysis(OmniAnalyze_Base):
                     self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_arch"]
                 ],
                 self._output,
+                self._profiling_config,
             )
diff --git a/src/rocprof_compute_base.py b/src/rocprof_compute_base.py
index c407482331..cd97676fde 100644
--- a/src/rocprof_compute_base.py
+++ b/src/rocprof_compute_base.py
@@ -33,10 +33,11 @@ import time
 from pathlib import Path
 
 import pandas as pd
+import yaml
 
 import config
 from argparser import omniarg_parser
-from utils import file_io
+from utils import file_io, parser, schema
 from utils.logger import (
     setup_console_handler,
     setup_file_handler,
@@ -47,6 +48,7 @@ from utils.utils import (
     console_debug,
     console_error,
     console_log,
+    console_warning,
     demarcate,
     detect_rocprof,
     get_submodules,
@@ -230,11 +232,50 @@ class RocProfCompute:
 
         return
 
+    @demarcate
+    def list_metrics(self):
+        if not self.__args.list_metrics:
+            arch = self.__mspec.gpu_arch
+        else:
+            arch = self.__args.list_metrics
+        if arch in self.__supported_archs.keys():
+            ac = schema.ArchConfig()
+            ac.panel_configs = file_io.load_panel_configs(
+                self.__args.config_dir.joinpath(arch)
+            )
+            sys_info = self.__mspec.get_class_members().iloc[0]
+            parser.build_dfs(archConfigs=ac, filter_metrics=[], sys_info=sys_info)
+            for key, value in ac.metric_list.items():
+                prefix = ""
+                if "." not in str(key):
+                    prefix = ""
+                elif str(key).count(".") == 1:
+                    prefix = "\t"
+                else:
+                    prefix = "\t\t"
+                print(prefix + key, "->", value)
+            sys.exit(0)
+        else:
+            console_error("Unsupported arch")
+
     @demarcate
     def run_profiler(self):
         self.print_graphic()
         self.load_soc_specs()
 
+        if self.__args.list_metrics is not None:
+            self.list_metrics()
+        elif self.__args.name is None:
+            sys.exit("Either --list-name or --name is required")
+
+        # Deprecation warning for hardware blocks
+        if [
+            name
+            for name, type in self.__args.filter_blocks.items()
+            if type == "hardware_block"
+        ]:
+            console_warning("Hardware block based filtering will be deprecated soon")
+
         # FIXME:
         #     Changing default path should be done at the end of arg parsing stage,
         #     unless there is a specific reason to do here.
@@ -250,25 +291,37 @@ class RocProfCompute:
             from rocprof_compute_profile.profiler_rocprof_v1 import rocprof_v1_profiler
 
             profiler = rocprof_v1_profiler(
-                self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch]
+                self.__args,
+                self.__profiler_mode,
+                self.__soc[self.__mspec.gpu_arch],
+                self.__supported_archs,
             )
         elif self.__profiler_mode == "rocprofv2":
             from rocprof_compute_profile.profiler_rocprof_v2 import rocprof_v2_profiler
 
             profiler = rocprof_v2_profiler(
-                self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch]
+                self.__args,
+                self.__profiler_mode,
+                self.__soc[self.__mspec.gpu_arch],
+                self.__supported_archs,
             )
         elif self.__profiler_mode == "rocprofv3":
             from rocprof_compute_profile.profiler_rocprof_v3 import rocprof_v3_profiler
 
             profiler = rocprof_v3_profiler(
-                self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch]
+                self.__args,
+                self.__profiler_mode,
+                self.__soc[self.__mspec.gpu_arch],
+                self.__supported_archs,
             )
         elif self.__profiler_mode == "rocscope":
             from rocprof_compute_profile.profiler_rocscope import rocscope_profiler
 
             profiler = rocscope_profiler(
-                self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch]
+                self.__args,
+                self.__profiler_mode,
+                self.__soc[self.__mspec.gpu_arch],
+                self.__supported_archs,
             )
         else:
             console_error("Unsupported profiler")
@@ -278,6 +331,11 @@ class RocProfCompute:
         # -----------------------
 
         self.__soc[self.__mspec.gpu_arch].profiling_setup()
+        # Write profiling configuration as yaml file
+        with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f:
+            args_dict = vars(self.__args)
+            args_dict["config_dir"] = str(args_dict["config_dir"])
+            yaml.dump(args_dict, f)
         # enable file-based logging
         setup_file_handler(self.__args.loglevel, self.__args.path)
 
diff --git a/src/rocprof_compute_profile/profiler_base.py b/src/rocprof_compute_profile/profiler_base.py
index d60340252d..23ac4580b7 100644
--- a/src/rocprof_compute_profile/profiler_base.py
+++ b/src/rocprof_compute_profile/profiler_base.py
@@ -27,7 +27,6 @@ import logging
 import os
 import re
 import shutil
-import sys
 import time
 from abc import ABC, abstractmethod
 from pathlib import Path
@@ -51,15 +50,22 @@ from utils.utils import (
 
 
 class RocProfCompute_Base:
-    def __init__(self, args, profiler_mode, soc):
+    def __init__(self, args, profiler_mode, soc, supported_archs):
         self.__args = args
         self.__profiler = profiler_mode
+        self.__supported_archs = supported_archs
         self._soc = soc  # OmniSoC obj
         self.__perfmon_dir = str(
             Path(str(config.rocprof_compute_home)).joinpath(
                 "rocprof_compute_soc", "profile_configs"
             )
         )
+        self.__filter_hardware_blocks = [
+            name for name, type in args.filter_blocks.items() if type == "hardware_block"
+        ]
+        self.__filter_metric_ids = [
+            name for name, type in args.filter_blocks.items() if type == "metric_id"
+        ]
 
     def get_args(self):
         return self.__args
@@ -320,10 +326,14 @@ class RocProfCompute_Base:
         console_log("Command: " + str(self.__args.remaining))
         console_log("Kernel Selection: " + str(self.__args.kernel))
         console_log("Dispatch Selection: " + str(self.__args.dispatch))
-        if self.__args.ipblocks == None:
+        if self.__filter_hardware_blocks == None:
             console_log("Hardware Blocks: All")
         else:
-            console_log("Hardware Blocks: " + str(self.__args.ipblocks))
+            console_log("Hardware Blocks: " + str(self.__filter_hardware_blocks))
+        if self.__filter_metric_ids == None:
+            console_log("Report Sections: All")
+        else:
+            console_log("Report Sections: " + str(self.__filter_metric_ids))
 
         msg = "Collecting Performance Counters"
         (
@@ -424,7 +434,11 @@ class RocProfCompute_Base:
         gen_sysinfo(
             workload_name=self.__args.name,
             workload_dir=self.get_args().path,
-            ip_blocks=self.__args.ipblocks,
+            ip_blocks=[
+                name
+                for name, type in self.__args.filter_blocks.items()
+                if type == "hardware_block"
+            ],
             app_cmd=self.__args.remaining,
             skip_roof=self.__args.no_roof,
             roof_only=self.__args.roof_only,
diff --git a/src/rocprof_compute_profile/profiler_rocprof_v1.py b/src/rocprof_compute_profile/profiler_rocprof_v1.py
index e4fa44cdfd..53e54b70fc 100644
--- a/src/rocprof_compute_profile/profiler_rocprof_v1.py
+++ b/src/rocprof_compute_profile/profiler_rocprof_v1.py
@@ -30,8 +30,8 @@ from utils.utils import console_log, demarcate, replace_timestamps, store_app_cm
 
 
 class rocprof_v1_profiler(RocProfCompute_Base):
-    def __init__(self, profiling_args, profiler_mode, soc):
-        super().__init__(profiling_args, profiler_mode, soc)
+    def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
+        super().__init__(profiling_args, profiler_mode, soc, supported_archs)
         self.ready_to_profile = (
             self.get_args().roof_only
             and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
diff --git a/src/rocprof_compute_profile/profiler_rocprof_v2.py b/src/rocprof_compute_profile/profiler_rocprof_v2.py
index 4ed8b2ba9f..4b04f92e64 100644
--- a/src/rocprof_compute_profile/profiler_rocprof_v2.py
+++ b/src/rocprof_compute_profile/profiler_rocprof_v2.py
@@ -31,8 +31,8 @@ from utils.utils import console_log, demarcate, replace_timestamps, store_app_cm
 
 
 class rocprof_v2_profiler(RocProfCompute_Base):
-    def __init__(self, profiling_args, profiler_mode, soc):
-        super().__init__(profiling_args, profiler_mode, soc)
+    def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
+        super().__init__(profiling_args, profiler_mode, soc, supported_archs)
         self.ready_to_profile = (
             self.get_args().roof_only
             and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
diff --git a/src/rocprof_compute_profile/profiler_rocprof_v3.py b/src/rocprof_compute_profile/profiler_rocprof_v3.py
index af4c3b1022..da14ff2e5c 100644
--- a/src/rocprof_compute_profile/profiler_rocprof_v3.py
+++ b/src/rocprof_compute_profile/profiler_rocprof_v3.py
@@ -32,8 +32,8 @@ from utils.utils import console_error, console_log, demarcate, replace_timestamp
 
 
 class rocprof_v3_profiler(RocProfCompute_Base):
-    def __init__(self, profiling_args, profiler_mode, soc):
-        super().__init__(profiling_args, profiler_mode, soc)
+    def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
+        super().__init__(profiling_args, profiler_mode, soc, supported_archs)
         self.ready_to_profile = (
             self.get_args().roof_only
             and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
diff --git a/src/rocprof_compute_profile/profiler_rocscope.py b/src/rocprof_compute_profile/profiler_rocscope.py
index 761e931dab..81d134c0e7 100644
--- a/src/rocprof_compute_profile/profiler_rocscope.py
+++ b/src/rocprof_compute_profile/profiler_rocscope.py
@@ -27,8 +27,8 @@ from utils.utils import console_log, demarcate
 
 
 class rocscope_profiler(RocProfCompute_Base):
-    def __init__(self, profiling_args, profiler_mode, soc):
-        super().__init__(profiling_args, profiler_mode, soc)
+    def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
+        super().__init__(profiling_args, profiler_mode, soc, supported_archs)
 
     # -----------------------
     # Required child methods
diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline_chart.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline_chart.yaml
deleted file mode 100644
index f1f87c5c9f..0000000000
--- a/src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline_chart.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
----
-Panel Config:
-  id: 400
-  title: Roofline
-  data source:
-    - raw_csv_table:
-        id: 401
-        source: roofline.csv
-        comparable: false # for now
-        cli_style: roofline_chart
-        # TODO: refactoring the data structure to have metrics here!
diff --git a/src/rocprof_compute_soc/soc_base.py b/src/rocprof_compute_soc/soc_base.py
index 88a2a10415..ff6eff4eb8 100644
--- a/src/rocprof_compute_soc/soc_base.py
+++ b/src/rocprof_compute_soc/soc_base.py
@@ -32,9 +32,17 @@ from collections import OrderedDict
 from pathlib import Path
 
 import numpy as np
+import yaml
 
 from rocprof_compute_base import MI300_CHIP_IDS, SUPPORTED_ARCHS
-from utils.utils import console_debug, console_error, console_log, demarcate
+from utils.parser import build_in_vars, supported_denom
+from utils.utils import (
+    console_debug,
+    console_error,
+    console_log,
+    convert_metric_id_to_panel_idx,
+    demarcate,
+)
 
 
 class OmniSoC_Base:
@@ -48,19 +56,10 @@ class OmniSoC_Base:
         self.__perfmon_config = (
             {}
         )  # Per IP block max number of simulutaneous counters. GFX IP Blocks
+        self.__section_counters = set()  # hw counters corresponding to filtered sections
         self.__soc_params = {}  # SoC specifications
         self.__compatible_profilers = []  # Store profilers compatible with SoC
         self.populate_mspec()
-        # In some cases (i.e. --specs) path will not be given
-        if hasattr(self.__args, "path"):
-            if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")):
-                self.__workload_dir = str(
-                    Path(self.__args.path).joinpath(
-                        self.__args.name, self._mspec.gpu_model
-                    )
-                )
-            else:
-                self.__workload_dir = self.__args.path
 
     def __hash__(self):
         return hash(self.__arch)
@@ -189,6 +188,47 @@ class OmniSoC_Base:
             total_xcds(self._mspec.gpu_model, self._mspec.compute_partition)
         )
 
+    @demarcate
+    def section_filter(self):
+        """
+        Create a set of counters required for the selected report sections.
+        Parse analysis report configuration files based on the selected report sections to be filtered.
+        """
+        args = self.__args
+        for section in self.__filter_metric_ids:
+            section_num = convert_metric_id_to_panel_idx(section)
+            file_id = str(section_num // 100)
+            # Convert "4" to "04"
+            if len(file_id) == 1:
+                file_id = f"0{file_id}"
+            # Identify yaml file corresponding to file_id
+            config_filename = [
+                filename
+                for filename in os.listdir(Path(args.config_dir).joinpath(self.__arch))
+                if filename.endswith(".yaml") and filename.startswith(file_id)
+            ][0]
+            # Read the yaml file
+            with open(
+                Path(args.config_dir).joinpath(self.__arch, config_filename), "r"
+            ) as stream:
+                section_config = yaml.safe_load(stream)
+            # Extract subsection if section is of the form 4.52
+            if section_num % 100:
+                section_config_text = "\n".join(
+                    [
+                        # Convert yaml to string
+                        yaml.dump(subsection)
+                        for subsection in section_config["Panel Config"]["data source"]
+                        if subsection["metric_table"]["id"] == section_num
+                    ]
+                )
+            else:
+                # Convert yaml to string
+                section_config_text = yaml.dump(section_config)
+            self.__section_counters = self.__section_counters.union(
+                parse_counters(section_config_text)
+            )
+
     @demarcate
     def perfmon_filter(self, roofline_perfmon_only: bool):
         """Filter default performance counter set based on user arguments"""
@@ -197,15 +237,40 @@ class OmniSoC_Base:
             and Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
         ):
             return
-        workload_perfmon_dir = self.__workload_dir + "/perfmon"
+
+        # In some cases (i.e. --specs) path will not be given
+        if hasattr(self.__args, "path"):
+            if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")):
+                workload_dir = str(
+                    Path(self.__args.path).joinpath(
+                        self.__args.name, self._mspec.gpu_model
+                    )
+                )
+            else:
+                workload_dir = self.__args.path
+
+        workload_perfmon_dir = workload_dir + "/perfmon"
+
+        self.__filter_hardware_blocks = [
+            name
+            for name, type in self.get_args().filter_blocks.items()
+            if type == "hardware_block"
+        ]
+        self.__filter_metric_ids = [
+            name
+            for name, type in self.get_args().filter_blocks.items()
+            if type == "metric_id"
+        ]
+
+        self.section_filter()
 
         # Initialize directories
-        if not Path(self.__workload_dir).is_dir():
-            os.makedirs(self.__workload_dir)
-        elif not Path(self.__workload_dir).is_symlink():
-            shutil.rmtree(self.__workload_dir)
+        if not Path(workload_dir).is_dir():
+            os.makedirs(workload_dir)
+        elif not Path(workload_dir).is_symlink():
+            shutil.rmtree(workload_dir)
         else:
-            os.unlink(self.__workload_dir)
+            os.unlink(workload_dir)
 
         os.makedirs(workload_perfmon_dir)
 
@@ -216,16 +281,17 @@ class OmniSoC_Base:
             )
 
             # Perfmon list filtering
-            if self.__args.ipblocks != None:
-                for i in range(len(self.__args.ipblocks)):
-                    self.__args.ipblocks[i] = self.__args.ipblocks[i].lower()
+            if self.__filter_hardware_blocks:
+                hardware_blocks = [
+                    block.lower() for block in self.__filter_hardware_blocks
+                ]
                 mpattern = "pmc_([a-zA-Z0-9_]+)_perf*"
 
                 pmc_files_list = []
                 for fname in ref_pmc_files_list:
                     fbase = Path(fname).stem
                     ip = re.match(mpattern, fbase).group(1)
-                    if ip in self.__args.ipblocks:
+                    if ip in hardware_blocks:
                         pmc_files_list.append(fname)
                         console_log("fname: " + fbase + ": Added")
                     else:
@@ -242,8 +308,9 @@ class OmniSoC_Base:
         perfmon_coalesce(
             pmc_files_list,
             self.__perfmon_config,
-            self.__workload_dir,
+            workload_dir,
             self.get_args().spatial_multiplexing,
+            self.__section_counters,
         )
 
     # ----------------------------------------------------
@@ -310,7 +377,38 @@ def using_v3():
 
 
 @demarcate
-def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir, spatial_multiplexing):
+def parse_counters(config_text):
+    """
+    Create a set of all hardware counters mentioned in the given config file content string
+    """
+    # hw counter name should start with ip block name
+    hw_counter_regex = r"(?:SQ|SQC|TA|TD|TCP|TCC|CPC|CPF|SPI|GRBM)_[0-9A-Za-z_]+"
+    # only capture the variable name after $ using capturing group
+    variable_regex = r"\$([0-9A-Za-z_]+)"
+    hw_counter_matches = set(re.findall(hw_counter_regex, config_text))
+    variable_matches = set(re.findall(variable_regex, config_text))
+    # get hw counters and variables for all supported denominators
+    for formula in supported_denom.values():
+        hw_counter_matches.update(re.findall(hw_counter_regex, formula))
+        variable_matches.update(re.findall(variable_regex, formula))
+    # get hw counters corresponding to variables recursively
+    while variable_matches:
+        subvariable_matches = set()
+        for var in variable_matches:
+            if var in build_in_vars:
+                hw_counter_matches.update(
+                    re.findall(hw_counter_regex, build_in_vars[var])
+                )
+                subvariable_matches.update(re.findall(variable_regex, build_in_vars[var]))
+        # process new found variables
+        variable_matches = subvariable_matches - variable_matches
+    return list(hw_counter_matches)
+
+
+@demarcate
+def perfmon_coalesce(
+    pmc_files_list, perfmon_config, workload_dir, spatial_multiplexing, section_counters
+):
     """Sort and bucket all related performance counters to minimize required application passes"""
     workload_perfmon_dir = workload_dir + "/perfmon"
 
@@ -388,6 +486,49 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir, spatial_multi
             if accu in normal_counters:
                 del normal_counters[accu]
 
+    # If section report filters have been provided, only collect counters necessary for those section reports
+    # Remove _sum and _expand suffixes while matching
+    def remove_suffixes(string):
+        for suffix in ["_sum", "_expand"]:
+            if string.endswith(suffix):
+                string = string[: -len(suffix)]
+                break
+        return string
+
+    section_counters = {remove_suffixes(counter) for counter in section_counters}
+    ignored_counters = list()
+
+    if section_counters:
+        # Remove unnecessary normal counters
+        for counter_name in list(normal_counters.keys()):
+            if remove_suffixes(counter_name) not in section_counters:
+                del normal_counters[counter_name]
+                ignored_counters.append(counter_name)
+
+        # Remove unnecessary accumulate counters
+        filtered_accumlate_counters = list()
+        for counters in accumulate_counters:
+            if any(
+                remove_suffixes(counter_name) in section_counters
+                for counter_name in counters
+            ):
+                filtered_accumlate_counters.append(counters)
+            else:
+                ignored_counters.extend(counter_name)
+        accumulate_counters = filtered_accumlate_counters
+
+    if ignored_counters:
+        console_log(
+            f"Not collecting following counters per provided filter: {', '.join(ignored_counters)} "
+        )
+
+    # Throw error if no counters to be collected
+    if len(normal_counters) == 0 and len(accumulate_counters) == 0:
+        console_error(
+            "profiling",
+            "No performance counters to collect, please check the provided profiling filters",
+        )
+
     output_files = []
 
     accu_file_count = 0
diff --git a/src/roofline.py b/src/roofline.py
index bd76480dab..e83f15b85a 100644
--- a/src/roofline.py
+++ b/src/roofline.py
@@ -25,9 +25,11 @@
 import os
 import time
 from abc import ABC, abstractmethod
+from collections import OrderedDict
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import plotly.graph_objects as go
 from dash import dcc, html
 
@@ -75,12 +77,6 @@ class Roofline:
         if hasattr(self.__args, "sort") and self.__args.sort != "ALL":
             self.__run_parameters["sort_type"] = self.__args.sort
 
-        if (
-            not isinstance(self.__run_parameters["workload_dir"], list)
-            and self.__run_parameters["workload_dir"] != None
-        ):
-            self.roof_setup()
-
         self.validate_parameters()
 
     def validate_parameters(self):
@@ -110,6 +106,12 @@ class Roofline:
         ret_df,
     ):
         """Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data"""
+        if (
+            not isinstance(self.__run_parameters["workload_dir"], list)
+            and self.__run_parameters["workload_dir"] != None
+        ):
+            self.roof_setup()
+
         # Create arithmetic intensity data that will populate the roofline model
         console_debug("roofline", "Path: %s" % self.__run_parameters["workload_dir"])
         self.__ai_data = calc_ai(self.__mspec, self.__run_parameters["sort_type"], ret_df)
@@ -375,9 +377,11 @@ class Roofline:
 
     @demarcate
     def standalone_roofline(self):
-        from collections import OrderedDict
-
-        import pandas as pd
+        if (
+            not isinstance(self.__run_parameters["workload_dir"], list)
+            and self.__run_parameters["workload_dir"] != None
+        ):
+            self.roof_setup()
 
         # Change vL1D to a interpretable str, if required
         if "vL1D" in self.__run_parameters["mem_level"]:
@@ -394,32 +398,6 @@ class Roofline:
         t_df["pmc_perf"] = pd.read_csv(app_path)
         self.empirical_roofline(ret_df=t_df)
 
-    # Main methods
-    @abstractmethod
-    def pre_processing(self):
-        if self.__args.roof_only:
-            # check for sysinfo
-            console_log(
-                "roofline", "Checking for sysinfo.csv in " + str(self.__args.path)
-            )
-            sysinfo_path = str(Path(self.__args.path).joinpath("sysinfo.csv"))
-            if not Path(sysinfo_path).is_file():
-                console_log("roofline", "sysinfo.csv not found. Generating...")
-
-                class Dummy_SoC:
-                    roofline_obj = True
-
-                gen_sysinfo(
-                    workload_name=self.__args.name,
-                    workload_dir=self.__workload_dir,
-                    ip_blocks=self.__args.ipblocks,
-                    app_cmd=self.__args.remaining,
-                    skip_roof=self.__args.no_roof,
-                    roof_only=self.__args.roof_only,
-                    mspec=self.__mspec,
-                    soc=Dummy_SoC,
-                )
-
     @abstractmethod
     def profile(self):
         if self.__args.roof_only:
diff --git a/src/utils/file_io.py b/src/utils/file_io.py
index 6de537def9..982a3d85e5 100644
--- a/src/utils/file_io.py
+++ b/src/utils/file_io.py
@@ -36,7 +36,7 @@ import yaml
 import config
 from utils import schema
 from utils.kernel_name_shortener import kernel_name_shortener
-from utils.utils import console_debug, console_error, demarcate
+from utils.utils import console_debug, console_error, console_log, demarcate
 
 # TODO: use pandas chunksize or dask to read really large csv file
 # from dask import dataframe as dd
@@ -85,6 +85,21 @@ def load_panel_configs(dir):
     return od
 
 
+def load_profiling_config(config_dir):
+    """
+    Load profiling config from yaml file.
+    """
+    try:
+        with open(Path(config_dir).joinpath("profiling_config.yaml")) as file:
+            prof_config = yaml.safe_load(file)
+            return prof_config
+    except FileNotFoundError:
+        console_log(
+            f"Could not find profiling_config.yaml in {config_dir} for filtering analysis report"
+        )
+    return dict()
+
+
 @demarcate
 def create_df_kernel_top_stats(
     df_in,
diff --git a/src/utils/parser.py b/src/utils/parser.py
index 74dbc6c300..20870597e3 100644
--- a/src/utils/parser.py
+++ b/src/utils/parser.py
@@ -492,7 +492,9 @@ def build_dfs(archConfigs, filter_metrics, sys_info):
                 if type == "metric_table":
                     headers = ["Metric_ID"]
                     data_source_idx = str(data_config["id"] // 100)
-                    if data_source_idx != 0 or data_source_idx in filter_metrics:
+                    if data_source_idx != 0 or (
+                        filter_metrics and data_source_idx in filter_metrics
+                    ):
                         metric_list[data_source_idx] = panel["title"]
                     if (
                         "cli_style" in data_config
diff --git a/src/utils/tty.py b/src/utils/tty.py
index 2294661f0b..91f37ad9e8 100644
--- a/src/utils/tty.py
+++ b/src/utils/tty.py
@@ -29,7 +29,7 @@ import pandas as pd
 from tabulate import tabulate
 
 from utils import parser
-from utils.utils import console_log, console_warning
+from utils.utils import console_log, console_warning, convert_metric_id_to_panel_idx
 
 hidden_columns = ["Tips", "coll_level"]
 hidden_sections = [1900, 2000]
@@ -60,11 +60,20 @@ def get_table_string(df, transpose=False, decimal=2):
     )
 
 
-def show_all(args, runs, archConfigs, output):
+def show_all(args, runs, archConfigs, output, profiling_config):
     """
     Show all panels with their data in plain text mode.
     """
     comparable_columns = parser.build_comparable_columns(args.time_unit)
+    filter_panel_ids = [
+        convert_metric_id_to_panel_idx(section)
+        for section in [
+            name
+            for name, type in profiling_config.get("filter_blocks", {}).items()
+            if type == "metric_id"
+        ]
+    ]
+    comparable_columns = parser.build_comparable_columns(args.time_unit)
 
     for panel_id, panel in archConfigs.panel_configs.items():
         # Skip panels that don't support baseline comparison
@@ -74,6 +83,27 @@ def show_all(args, runs, archConfigs, output):
 
         for data_source in panel["data source"]:
             for type, table_config in data_source.items():
+                # If block filtering was used during analysis, then dont use profiling config
+                # If block filtering was used in profiling config, only show those panels
+                # If block filtering not used in profiling config, show all panels
+                # Skip this table if table id or panel id is not present in block filters
+                # However, always show panel id <= 100
+                if (
+                    not args.filter_metrics
+                    and filter_panel_ids
+                    and table_config["id"] not in filter_panel_ids
+                    and panel_id not in filter_panel_ids
+                    and panel_id > 100
+                ):
+                    table_id_str = (
+                        str(table_config["id"] // 100)
+                        + "."
+                        + str(table_config["id"] % 100)
+                    )
+                    console_log(
+                        f"Not showing table not selected during profiling: {table_id_str} {table_config['title']}"
+                    )
+                    continue
                 # take the 1st run as baseline
                 base_run, base_data = next(iter(runs.items()))
                 base_df = base_data.dfs[table_config["id"]]
@@ -207,7 +237,25 @@ def show_all(args, runs, archConfigs, output):
                         + str(table_config["id"] % 100)
                     )
 
-                    if "title" in table_config and table_config["title"]:
+                    # Check if any column in df is empty
+                    is_empty_columns_exist = any(
+                        [
+                            df.columns[col_idx]
+                            for col_idx in range(len(df.columns))
+                            if df.replace("", None).iloc[:, col_idx].isnull().all()
+                        ]
+                    )
+                    # Do not print the table if any column is empty
+                    if is_empty_columns_exist:
+                        console_log(
+                            f"Not showing table with empty column(s): {table_id_str} {table_config['title']}"
+                        )
+
+                    if (
+                        "title" in table_config
+                        and table_config["title"]
+                        and not is_empty_columns_exist
+                    ):
                         ss += table_id_str + " " + table_config["title"] + "\n"
 
                     if args.df_file_dir:
@@ -238,10 +286,13 @@ def show_all(args, runs, archConfigs, output):
                         and "columnwise" in table_config
                         and table_config["columnwise"] == True
                     )
-                    ss += (
-                        get_table_string(df, transpose=transpose, decimal=args.decimal)
-                        + "\n"
-                    )
+                    if not is_empty_columns_exist:
+                        ss += (
+                            get_table_string(
+                                df, transpose=transpose, decimal=args.decimal
+                            )
+                            + "\n"
+                        )
 
         if ss:
             print("\n" + "-" * 80, file=output)
diff --git a/src/utils/utils.py b/src/utils/utils.py
index aada34f17a..fa8c4a3f4c 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -191,7 +191,7 @@ def capture_subprocess_output(subprocess_args, new_env=None, profileMode=False):
     global rocprof_args
     # Format command for debug messages, formatting for rocprofv1 and rocprofv2
     command = " ".join(rocprof_args)
-    console_debug("subprocess", "Running: " + command)
+    console_debug("subprocess", "Running: " + command + " " + " ".join(subprocess_args))
     # Start subprocess
     # bufsize = 1 means output is line buffered
     # universal_newlines = True is required for line buffering
@@ -820,7 +820,7 @@ def gen_sysinfo(
     df["workload_name"] = workload_name
 
     blocks = []
-    if ip_blocks == None:
+    if not ip_blocks:
         t = ["SQ", "LDS", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"]
         blocks += t
     else:
@@ -1249,3 +1249,16 @@ def merge_counters_spatial_multiplex(df_multi_index):
 
     final_df = pd.concat(result_dfs, keys=coll_levels, axis=1, copy=False)
     return final_df
+
+
+def convert_metric_id_to_panel_idx(metric_id):
+    # "4.02" -> 402
+    # "4.23" -> 423
+    # "4" -> 400
+    tokens = metric_id.split(".")
+    if len(tokens) == 1:
+        return int(tokens[0]) * 100
+    elif len(tokens) == 2:
+        return int(tokens[0]) * 100 + int(tokens[1])
+    else:
+        raise Exception(f"Invalid metric id: {metric_id}")
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 8841730cc1..01b3be1cd8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -13,3 +13,8 @@ set(VCOPY_SOURCES ../sample/vcopy.cpp)
 set_source_files_properties(${VCOPY_SOURCES} PROPERTIES LANGUAGE HIP)
 add_executable(vcopy ${VCOPY_SOURCES})
 set_target_properties(vcopy PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/tests)
+
+set(VMEM_SOURCES ../sample/vmem.hip)
+set_source_files_properties(${VMEM_SOURCES} PROPERTIES LANGUAGE HIP)
+add_executable(vmem ${VMEM_SOURCES})
+set_target_properties(vmem PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/tests)
diff --git a/tests/conftest.py b/tests/conftest.py
index bdcb56866d..bfd9772930 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,11 @@
+import subprocess
+from importlib.machinery import SourceFileLoader
+from unittest.mock import patch
+
 import pytest
 
+rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
+
 
 def pytest_addoption(parser):
     parser.addoption(
@@ -8,3 +14,69 @@ def pytest_addoption(parser):
         default=False,
         help="Call standalone binary instead of main function during tests",
     )
+
+
+@pytest.fixture
+def binary_handler_profile_rocprof_compute(request):
+    def _handler(config, workload_dir, options=[], check_success=True, roof=False):
+        if request.config.getoption("--call-binary"):
+            baseline_opts = [
+                "build/rocprof-compute.bin",
+                "profile",
+                "-n",
+                "app_1",
+                "-VVV",
+            ]
+            if not roof:
+                baseline_opts.append("--no-roof")
+            process = subprocess.run(
+                baseline_opts
+                + options
+                + ["--path", workload_dir, "--"]
+                + config["app_1"],
+                text=True,
+            )
+            # verify run status
+            if check_success:
+                assert process.returncode == 0
+            return process.returncode
+        else:
+            baseline_opts = ["rocprof-compute", "profile", "-n", "app_1", "-VVV"]
+            if not roof:
+                baseline_opts.append("--no-roof")
+            with pytest.raises(SystemExit) as e:
+                with patch(
+                    "sys.argv",
+                    baseline_opts
+                    + options
+                    + ["--path", workload_dir, "--"]
+                    + config["app_1"],
+                ):
+                    rocprof_compute.main()
+            # verify run status
+            if check_success:
+                assert e.value.code == 0
+            return e.value.code
+
+    return _handler
+
+
+@pytest.fixture
+def binary_handler_analyze_rocprof_compute(request):
+    def _handler(arguments):
+        if request.config.getoption("--call-binary"):
+            process = subprocess.run(
+                ["build/rocprof-compute.bin", *arguments],
+                text=True,
+            )
+            return process.returncode
+        else:
+            with pytest.raises(SystemExit) as e:
+                with patch(
+                    "sys.argv",
+                    ["rocprof-compute", *arguments],
+                ):
+                    rocprof_compute.main()
+            return e.value.code
+
+    return _handler
diff --git a/tests/test_analyze_commands.py b/tests/test_analyze_commands.py
index e3a910ae3b..df1e9603de 100644
--- a/tests/test_analyze_commands.py
+++ b/tests/test_analyze_commands.py
@@ -6,7 +6,6 @@ from unittest.mock import patch
 import pandas as pd
 import pytest
 import test_utils
-from test_utils import binary_handler_analyze_rocprof_compute
 
 config = {}
 config["cleanup"] = True if "PYTEST_XDIST_WORKER_COUNT" in os.environ else False
diff --git a/tests/test_analyze_workloads.py b/tests/test_analyze_workloads.py
index eddd54937f..94d96f8e85 100644
--- a/tests/test_analyze_workloads.py
+++ b/tests/test_analyze_workloads.py
@@ -1,8 +1,6 @@
 from unittest.mock import patch
 
-import pandas as pd
 import pytest
-from test_utils import binary_handler_analyze_rocprof_compute
 
 ##################################################
 ##          Generated tests                     ##
diff --git a/tests/test_profile_general.py b/tests/test_profile_general.py
index 1083666697..3b573e4264 100644
--- a/tests/test_profile_general.py
+++ b/tests/test_profile_general.py
@@ -11,7 +11,6 @@ from unittest.mock import patch
 import pandas as pd
 import pytest
 import test_utils
-from test_utils import binary_handler_profile_rocprof_compute
 
 # Globals
 
@@ -1458,3 +1457,136 @@ def test_mem_levels_LDS(binary_handler_profile_rocprof_compute):
     )
 
     test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.section
+def test_instmix_section(binary_handler_profile_rocprof_compute):
+    options = ["--block", "10"]
+    workload_dir = test_utils.get_output_dir()
+    _ = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=True, roof=False
+    )
+
+    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
+    validate(
+        inspect.stack()[0][3],
+        workload_dir,
+        file_dict,
+    )
+
+    assert test_utils.check_file_pattern(
+        "'10': metric_id", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "SQ_INSTS_VALU_MFMA_F64", f"{workload_dir}/pmc_perf.csv"
+    )
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.section
+def test_instmix_memchart_section(binary_handler_profile_rocprof_compute):
+    options = ["--block", "10", "3"]
+    workload_dir = test_utils.get_output_dir()
+    _ = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=True, roof=False
+    )
+
+    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
+    validate(
+        inspect.stack()[0][3],
+        workload_dir,
+        file_dict,
+    )
+
+    assert test_utils.check_file_pattern(
+        "'10': metric_id", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "'3': metric_id", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "SQ_INSTS_VALU_MFMA_F64", f"{workload_dir}/pmc_perf.csv"
+    )
+    assert test_utils.check_file_pattern(
+        "SQC_TC_DATA_READ_REQ", f"{workload_dir}/pmc_perf.csv"
+    )
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.section
+def test_instmix_section_TA_block(binary_handler_profile_rocprof_compute):
+    options = ["--block", "10", "TA"]
+    workload_dir = test_utils.get_output_dir()
+    _ = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=True, roof=False
+    )
+
+    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
+    validate(
+        inspect.stack()[0][3],
+        workload_dir,
+        file_dict,
+    )
+
+    assert test_utils.check_file_pattern(
+        "'10': metric_id", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "TA: hardware_block", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv"
+    )
+    assert not test_utils.check_file_pattern(
+        "SQC_TC_DATA_READ_REQ", f"{workload_dir}/pmc_perf.csv"
+    )
+    assert test_utils.check_file_pattern("", f"{workload_dir}/pmc_perf.csv")
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.section
+def test_instmix_section_global_write_kernel(binary_handler_profile_rocprof_compute):
+    options = ["-k", "global_write", "--block", "10"]
+    custom_config = dict(config)
+    custom_config["kernel_name_1"] = "global_write"
+    custom_config["app_1"] = ["./tests/vmem"]
+    num_kernels = 1
+
+    workload_dir = test_utils.get_output_dir()
+    _ = binary_handler_profile_rocprof_compute(
+        custom_config, workload_dir, options, check_success=True, roof=False
+    )
+
+    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
+    validate(
+        inspect.stack()[0][3],
+        workload_dir,
+        file_dict,
+    )
+
+    assert test_utils.check_file_pattern(
+        "'10': metric_id", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "- global_write", f"{workload_dir}/profiling_config.yaml"
+    )
+    assert test_utils.check_file_pattern(
+        "SQ_INSTS_VALU_MFMA_F64", f"{workload_dir}/pmc_perf.csv"
+    )
+    assert test_utils.check_file_pattern("global_write", f"{workload_dir}/pmc_perf.csv")
+    assert not test_utils.check_file_pattern(
+        "global_read", f"{workload_dir}/pmc_perf.csv"
+    )
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.section
+def test_list_metrics(binary_handler_profile_rocprof_compute):
+    options = ["--list-metrics"]
+    workload_dir = test_utils.get_output_dir()
+    _ = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=True, roof=False
+    )
+    # workload dir should be empty
+    assert not os.listdir(workload_dir)
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index e1a3c2c7e8..0a9bd05149 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -25,16 +25,11 @@
 
 import inspect
 import os
+import re
 import shutil
-import subprocess
-from importlib.machinery import SourceFileLoader
 from pathlib import Path
-from unittest.mock import patch
 
 import pandas as pd
-import pytest
-
-rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
 
 
 def check_resource_allocation():
@@ -57,6 +52,14 @@ def check_resource_allocation():
     return
 
 
+def check_file_pattern(pattern, file_path):
+    """Check if the given pattern exists in the file"""
+    content = ""
+    with open(file_path) as f:
+        content = f.read()
+    return len(re.findall(pattern, content)) != 0
+
+
 def get_output_dir(suffix="_output", clean_existing=True):
     """Provides a unique output directory based on the name of the calling test function with a suffix applied.
 
@@ -130,69 +133,3 @@ def check_csv_files(output_dir, num_devices, num_kernels):
         elif file.endswith(".pdf"):
             file_dict[file] = "pdf"
     return file_dict
-
-
-@pytest.fixture
-def binary_handler_profile_rocprof_compute(request):
-    def _handler(config, workload_dir, options=[], check_success=True, roof=False):
-        if request.config.getoption("--call-binary"):
-            baseline_opts = [
-                "build/rocprof-compute.bin",
-                "profile",
-                "-n",
-                "app_1",
-                "-VVV",
-            ]
-            if not roof:
-                baseline_opts.append("--no-roof")
-            process = subprocess.run(
-                baseline_opts
-                + options
-                + ["--path", workload_dir, "--"]
-                + config["app_1"],
-                text=True,
-            )
-            # verify run status
-            if check_success:
-                assert process.returncode == 0
-            return process.returncode
-        else:
-            baseline_opts = ["rocprof-compute", "profile", "-n", "app_1", "-VVV"]
-            if not roof:
-                baseline_opts.append("--no-roof")
-            with pytest.raises(SystemExit) as e:
-                with patch(
-                    "sys.argv",
-                    baseline_opts
-                    + options
-                    + ["--path", workload_dir, "--"]
-                    + config["app_1"],
-                ):
-                    rocprof_compute.main()
-            # verify run status
-            if check_success:
-                assert e.value.code == 0
-            return e.value.code
-
-    return _handler
-
-
-@pytest.fixture
-def binary_handler_analyze_rocprof_compute(request):
-    def _handler(arguments):
-        if request.config.getoption("--call-binary"):
-            process = subprocess.run(
-                ["build/rocprof-compute.bin", *arguments],
-                text=True,
-            )
-            return process.returncode
-        else:
-            with pytest.raises(SystemExit) as e:
-                with patch(
-                    "sys.argv",
-                    ["rocprof-compute", *arguments],
-                ):
-                    rocprof_compute.main()
-            return e.value.code
-
-    return _handler