Remove rocprofv1/v2 in favour of rocprofiler-sdk (#673)

* Set default rocprof interface as rocprofiler-sdk

* Remove rocrprofv1 and rocprofv2 interfaces

* Remove deprecation notice for rocprof v1/v2/v3 interfaces
  * Make rocprofiler-sdk the default interface and make rocprofv3 interface opt-in using ROCPROF=rocprofv3

* Add deprecation notice for rocprofv3
This commit is contained in:
vedithal-amd
2025-09-24 10:37:01 -04:00
committed by GitHub
parent 7df02745eb
commit bd7a1de879
16 changed files with 235 additions and 2365 deletions
@@ -127,6 +127,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* `--list-available-metrics` analyze mode option to display the metrics available for analysis.
* `--block` option cannot be used with `--list-metrics` and `--list-available-metrics`options.
* Default rocprof interface changed from rocprofv3 to rocprofiler-sdk
* Use ROCPROF=rocprofv3 to use rocprofv3 interface
### Removed
* Usage of `rocm-smi` in favor of `amd-smi`.
@@ -113,11 +113,7 @@ class RocProfCompute:
def detect_profiler(self) -> None:
profiler_mode = detect_rocprof(self.__args)
if str(profiler_mode).endswith("rocprof"):
self.__profiler_mode = "rocprofv1"
elif str(profiler_mode).endswith("rocprofv2"):
self.__profiler_mode = "rocprofv2"
elif str(profiler_mode).endswith("rocprofv3"):
if str(profiler_mode).endswith("rocprofv3"):
self.__profiler_mode = "rocprofv3"
elif str(profiler_mode) == "rocprofiler-sdk":
self.__profiler_mode = "rocprofiler-sdk"
@@ -303,16 +299,32 @@ class RocProfCompute:
sys.exit(0)
profiler_classes = {
"rocprofv3": (
"rocprof_compute_profile.profiler_rocprof_v3",
"rocprof_v3_profiler",
),
"rocprofiler-sdk": (
"rocprof_compute_profile.profiler_rocprofiler_sdk",
"rocprofiler_sdk_profiler",
),
}
if self.__profiler_mode not in profiler_classes:
console_error("Unsupported profiler")
module_name, class_name = profiler_classes[self.__profiler_mode]
module = importlib.import_module(module_name)
profiler_class = getattr(module, class_name)
return profiler_class(
self.__args,
self.__profiler_mode,
self.__soc[self.__mspec.gpu_arch],
)
def create_profiler(self) -> object:
profiler_classes = {
"rocprofv1": (
"rocprof_compute_profile.profiler_rocprof_v1",
"rocprof_v1_profiler",
),
"rocprofv2": (
"rocprof_compute_profile.profiler_rocprof_v2",
"rocprof_v2_profiler",
),
"rocprofv3": (
"rocprof_compute_profile.profiler_rocprof_v3",
"rocprof_v3_profiler",
@@ -466,16 +466,9 @@ class RocProfCompute_Base:
console_debug(output)
console_log("profiling", f"Current input file: {fname}")
if self.__profiler in (
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
):
options = self.get_profiler_options(str(fname), self._soc)
start_time = time.time()
options = self.get_profiler_options(fname, self._soc)
start_time = time.time()
if self.__profiler == "rocprofv3" or self.__profiler == "rocprofiler-sdk":
# Only 1-run case is permitted for attach/detach
if (isinstance(options, list) and "--pid" in options) or (
isinstance(options, dict)
@@ -490,7 +483,6 @@ class RocProfCompute_Base:
f'passes. Please use "--block" or "--set" '
f"to adjust or reduce the requested performance metrics!"
)
run_prof(
fname=str(fname),
profiler_options=options,
@@ -1,109 +0,0 @@
##############################################################################
# MIT License
#
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
##############################################################################
from pathlib import Path
import config
from rocprof_compute_profile.profiler_base import RocProfCompute_Base
from utils.logger import console_log, demarcate
from utils.utils import replace_timestamps, store_app_cmd
class rocprof_v1_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc):
super().__init__(profiling_args, profiler_mode, soc)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
or not self.get_args().roof_only
)
def get_profiler_options(self, fname, soc):
fbase = Path(fname).stem
app_cmd = self.get_args().remaining
args = []
# rocprof v1 does not support some counters on gfx 908 architecture
if soc.get_arch() == "gfx908":
metrics_path = str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc", "profile_configs", "metrics.xml"
)
)
args += ["-m", metrics_path]
args += [
# v1 requires request for timestamps
"--timestamp",
"on",
# v1 requires csv extension
"-o",
self.get_args().path + "/" + fbase + ".csv",
# v1 does require quotes on app cmd
'"' + app_cmd + '"',
]
# store original args for debug message
store_app_cmd([
"--timestamp",
"on",
"-o",
self.get_args().path + "/" + fbase + ".csv",
app_cmd,
])
return args
# -----------------------
# Required child methods
# -----------------------
@demarcate
def pre_processing(self):
"""Perform any pre-processing steps prior to profiling."""
super().pre_processing()
@demarcate
def run_profiling(self, version: str, prog: str):
"""Run profiling."""
if self.ready_to_profile:
if self.get_args().roof_only:
console_log(
"roofline", "Generating pmc_perf.csv (roofline counters only)."
)
# Log profiling options and setup filtering
super().run_profiling(version, prog)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Run roofline microbenchmark
super().post_processing()
# Replace timestamp data to solve a known rocprof bug
replace_timestamps(self.get_args().path)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@@ -1,99 +0,0 @@
##############################################################################
# MIT License
#
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
##############################################################################
import shlex
from pathlib import Path
import config
from rocprof_compute_profile.profiler_base import RocProfCompute_Base
from utils.logger import console_log, demarcate
from utils.utils import replace_timestamps, store_app_cmd
class rocprof_v2_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc):
super().__init__(profiling_args, profiler_mode, soc)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
or not self.get_args().roof_only
)
def get_profiler_options(self, fname, soc):
app_cmd = shlex.split(self.get_args().remaining)
args = []
# rocprof v2 does not support some counters on gfx 908 architecture
if soc.get_arch() == "gfx908":
metrics_path = str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc", "profile_configs", "metrics.xml"
)
)
args += ["-m", metrics_path]
args += [
# v2 requires output directory argument
"-d",
self.get_args().path + "/" + "out",
]
args.extend(app_cmd)
# store args for debug message
store_app_cmd(args)
return args
# -----------------------
# Required child methods
# -----------------------
@demarcate
def pre_processing(self):
"""Perform any pre-processing steps prior to profiling."""
super().pre_processing()
@demarcate
def run_profiling(self, version, prog):
"""Run profiling."""
if self.ready_to_profile:
if self.get_args().roof_only:
console_log(
"roofline", "Generating pmc_perf.csv (roofline counters only)."
)
# Log profiling options and setup filtering
super().run_profiling(version, prog)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Run roofline microbenchmark
super().post_processing()
# Replace timestamp data to solve a known rocprof bug
replace_timestamps(self.get_args().path)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@@ -1,737 +0,0 @@
<gfx908>
# CPC counters
<metric
name="CPC_ME1_BUSY_FOR_PACKET_DECODE" block=CPC event=13 descr="Me1 busy for packet decode."
></metric>
<metric
name="CPC_UTCL1_STALL_ON_TRANSLATION" block=CPC event=24 descr="One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."
></metric>
<metric
name="CPC_CPC_STAT_BUSY" block=CPC event=25 descr="CPC Busy."
></metric>
<metric
name="CPC_CPC_STAT_IDLE" block=CPC event=26 descr="CPC Idle."
></metric>
<metric
name="CPC_CPC_STAT_STALL" block=CPC event=27 descr="CPC Stalled."
></metric>
<metric
name="CPC_CPC_TCIU_BUSY" block=CPC event=28 descr="CPC TCIU interface Busy."
></metric>
<metric
name="CPC_CPC_TCIU_IDLE" block=CPC event=29 descr="CPC TCIU interface Idle."
></metric>
<metric
name="CPC_CPC_UTCL2IU_BUSY" block=CPC event=30 descr="CPC UTCL2 interface Busy."
></metric>
<metric
name="CPC_CPC_UTCL2IU_IDLE" block=CPC event=31 descr="CPC UTCL2 interface Idle."
></metric>
<metric
name="CPC_CPC_UTCL2IU_STALL" block=CPC event=32 descr="CPC UTCL2 interface Stalled waiting on Free, Tags or Translation."
></metric>
<metric
name="CPC_ME1_DC0_SPI_BUSY" block=CPC event=33 descr="CPC Me1 Processor Busy."
></metric>
<metric
name="CPF_CMP_UTCL1_STALL_ON_TRANSLATION" block=CPF event=20 descr="One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING response."
></metric>
<metric
name="CPF_CPF_STAT_BUSY" block=CPF event=23 descr="CPF Busy."
></metric>
<metric
name="CPF_CPF_STAT_IDLE" block=CPF event=24 descr="CPF Idle."
></metric>
<metric
name="CPF_CPF_STAT_STALL" block=CPF event=25 descr="CPF Stalled."
></metric>
<metric
name="CPF_CPF_TCIU_BUSY" block=CPF event=26 descr="CPF TCIU interface Busy."
></metric>
<metric
name="CPF_CPF_TCIU_IDLE" block=CPF event=27 descr="CPF TCIU interface Idle."
></metric>
<metric
name="CPF_CPF_TCIU_STALL" block=CPF event=28 descr="CPF TCIU interface Stalled waiting on Free, Tags."
></metric>
# GRBM counters
<metric
name="GRBM_COUNT" block=GRBM event=0 descr="Tie High - Count Number of Clocks"
></metric>
<metric
name="GRBM_GUI_ACTIVE" block=GRBM event=2 descr="The GUI is Active"
></metric>
<metric
name="GRBM_CP_BUSY" block=GRBM event=3 descr="Any of the Command Processor (CPG/CPC/CPF) blocks are busy."
></metric>
<metric
name="GRBM_SPI_BUSY" block=GRBM event=11 descr="Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."
></metric>
<metric
name="GRBM_TA_BUSY" block=GRBM event=13 descr="Any of the Texture Pipes (TA) are busy in the shader engine(s)."
></metric>
<metric
name="GRBM_TC_BUSY" block=GRBM event=28 descr="Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."
></metric>
<metric
name="GRBM_CPC_BUSY" block=GRBM event=30 descr="The Command Processor Compute (CPC) is busy."
></metric>
<metric
name="GRBM_CPF_BUSY" block=GRBM event=31 descr="The Command Processor Fetchers (CPF) is busy."
></metric>
<metric
name="GRBM_UTCL2_BUSY" block=GRBM event=34 descr="The Unified Translation Cache Level-2 (UTCL2) block is busy."
></metric>
<metric
name="GRBM_EA_BUSY" block=GRBM event=35 descr="The Efficiency Arbiter (EA) block is busy."
></metric>
# SPI counters
<metric
name="SPI_CSN_WINDOW_VALID" block=SPI event=47 descr="Clock count enabled by perfcounter_start event. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_CSN_BUSY" block=SPI event=48 descr="Number of clocks with outstanding waves (SPI or SH). Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_CSN_NUM_THREADGROUPS" block=SPI event=49 descr="Number of threadgroups launched. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_CSN_WAVE" block=SPI event=52 descr="Number of waves. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_RA_REQ_NO_ALLOC" block=SPI event=79 descr="Arb cycles with requests but no allocation. Source is RA0"
></metric>
<metric
name="SPI_RA_REQ_NO_ALLOC_CSN" block=SPI event=85 descr="Arb cycles with CSn req and no CSn alloc. Source is RA0"
></metric>
<metric
name="SPI_RA_RES_STALL_CSN" block=SPI event=91 descr="Arb cycles with CSn req and no CSn fits. Source is RA0"
></metric>
<metric
name="SPI_RA_TMP_STALL_CSN" block=SPI event=97 descr="Cycles where csn wants to req but does not fit in temp space."
></metric>
<metric
name="SPI_RA_WAVE_SIMD_FULL_CSN" block=SPI event=103 descr="Sum of SIMD where WAVE can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_VGPR_SIMD_FULL_CSN" block=SPI event=109 descr="Sum of SIMD where VGPR can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_SGPR_SIMD_FULL_CSN" block=SPI event=115 descr="Sum of SIMD where SGPR can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_LDS_CU_FULL_CSN" block=SPI event=120 descr="Sum of CU where LDS can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_BAR_CU_FULL_CSN" block=SPI event=123 descr="Sum of CU where BARRIER can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_BULKY_CU_FULL_CSN" block=SPI event=125 descr="Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_TGLIM_CU_FULL_CSN" block=SPI event=127 descr="Cycles where csn wants to req but all CU are at tg_limit"
></metric>
<metric
name="SPI_RA_WVLIM_STALL_CSN" block=SPI event=133 descr="Number of clocks csn is stalled due to WAVE LIMIT."
></metric>
<metric
name="SPI_SWC_CSC_WR" block=SPI event=189 descr="Number of clocks to write CSC waves to SGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_VWC_CSC_WR" block=SPI event=195 descr="Number of clocks to write CSC waves to VGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
# SQ counters
<metric
name="SQ_ACCUM_PREV" block=SQ event=1 descr="For counter N, increment by the value of counter N-1. Only accumulates once every 4 cycles."
></metric>
<metric
name="SQ_CYCLES" block=SQ event=2 descr="Clock cycles. (nondeterministic, per-simd, global)"
></metric>
<metric
name="SQ_BUSY_CYCLES" block=SQ event=3 descr="Clock cycles while SQ is reporting that it is busy. (nondeterministic, per-simd, global)"
></metric>
<metric
name="SQ_WAVES" block=SQ event=4 descr="Count number of waves sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_LEVEL_WAVES" block=SQ event=5 descr="Track the number of waves. Set ACCUM_PREV for the next counter to use this. (level, per-simd, global)"
></metric>
<metric
name="SQ_WAVES_EQ_64" block=SQ event=6 descr="Count number of waves with exactly 64 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_64" block=SQ event=7 descr="Count number of waves with <64 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_48" block=SQ event=8 descr="Count number of waves with <48 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_32" block=SQ event=9 descr="Count number of waves sent <32 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_16" block=SQ event=10 descr="Count number of waves sent <16 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_BUSY_CU_CYCLES" block=SQ event=13 descr="Count quad-cycles each CU is busy. (nondeterministic, per-simd)"
></metric>
<metric
name="SQ_ITEMS" block=SQ event=14 descr="Number of valid items per wave. (per-simd, global)"
></metric>
<metric
name="SQ_INSTS" block=SQ event=25 descr="Number of instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VALU" block=SQ event=26 descr="Number of VALU instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_MFMA" block=SQ event=27 descr="Number of MFMA instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VMEM_WR" block=SQ event=28 descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VMEM_RD" block=SQ event=29 descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VMEM" block=SQ event=30 descr="Number of VMEM instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_SALU" block=SQ event=31 descr="Number of SALU instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_SMEM" block=SQ event=32 descr="Number of SMEM instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_FLAT" block=SQ event=33 descr="Number of FLAT instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_FLAT_LDS_ONLY" block=SQ event=34 descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_LDS" block=SQ event=35 descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_GDS" block=SQ event=36 descr="Number of GDS instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_EXP_GDS" block=SQ event=38 descr="Number of EXP and GDS instructions issued, excluding skipped export instructions. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_BRANCH" block=SQ event=39 descr="Number of Branch instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_SENDMSG" block=SQ event=40 descr="Number of Sendmsg instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VSKIPPED" block=SQ event=41 descr="Number of vector instructions skipped. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_LEVEL_VMEM" block=SQ event=42 descr="Number of in-flight VMEM instructions. Set next counter to ACCUM_PREV and divide by INSTS_VMEM for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"
></metric>
<metric
name="SQ_INST_LEVEL_SMEM" block=SQ event=43 descr="Number of in-flight SMEM instructions (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). Set next counter to ACCUM_PREV and divide by INSTS_SMEM for average latency per smem request. Falls slightly short of total request latency because some fetches are divided into two requests that may finish at different times and this counter collects the average latency of the two. (per-simd, level, nondeterministic)"
></metric>
<metric
name="SQ_INST_LEVEL_LDS" block=SQ event=44 descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"
></metric>
<metric
name="SQ_WAVE_CYCLES" block=SQ event=47 descr="Number of wave-cycles spent by waves in the CUs (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_WAIT_ANY" block=SQ event=58 descr="Number of wave-cycles spent waiting for anything (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_WAIT_INST_ANY" block=SQ event=61 descr="Number of wave-cycles spent waiting for any instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_WAIT_INST_LDS" block=SQ event=64 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_ACTIVE_INST_ANY" block=SQ event=69 descr="Number of cycles each wave is working on an instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_VMEM" block=SQ event=70 descr="Number of cycles the SQ instruction arbiter is working on a VMEM instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_LDS" block=SQ event=71 descr="Number of cycles the SQ instruction arbiter is working on a LDS instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_VALU" block=SQ event=72 descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_SCA" block=SQ event=73 descr="Number of cycles the SQ instruction arbiter is working on a SALU or SMEM instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_EXP_GDS" block=SQ event=74 descr="Number of cycles the SQ instruction arbiter is working on an EXPORT or GDS instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_MISC" block=SQ event=75 descr="Number of cycles the SQ instruction aribter is working on a BRANCH or SENDMSG instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_FLAT" block=SQ event=76 descr="Number of cycles the SQ instruction arbiter is working on a FLAT instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_VMEM_WR" block=SQ event=77 descr="Number of cycles needed to send addr and cmd data for VMEM write instructions. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_VMEM_RD" block=SQ event=78 descr="Number of cycles needed to send addr and cmd data for VMEM read instructions. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_SMEM" block=SQ event=84 descr="Number of cycles needed to execute scalar memory reads. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_SALU" block=SQ event=85 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"
></metric>
<metric
name="SQ_THREAD_CYCLES_VALU" block=SQ event=86 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"
></metric>
<metric
name="SQ_IFETCH" block=SQ event=88 descr="Number of instruction fetch requests from cache. (per-simd, emulated)"
></metric>
<metric
name="SQ_IFETCH_LEVEL" block=SQ event=89 descr="Number of instruction fetch requests from cache. (per-simd, level)"
></metric>
<metric
name="SQ_LDS_BANK_CONFLICT" block=SQ event=94 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"
></metric>
<metric
name="SQ_LDS_ADDR_CONFLICT" block=SQ event=95 descr="Number of cycles LDS is stalled by address conflicts. (emulated,nondeterministic)"
></metric>
<metric
name="SQ_LDS_UNALIGNED_STALL" block=SQ event=96 descr="Number of cycles LDS is stalled processing flat unaligned load/store ops. (emulated)"
></metric>
<metric
name="SQ_LDS_MEM_VIOLATIONS" block=SQ event=97 descr="Number of threads that have a memory violation in the LDS.(emulated)"
></metric>
<metric
name="SQ_LDS_ATOMIC_RETURN" block=SQ event=98 descr="Number of atomic return cycles in LDS. (per-simd, emulated)"
></metric>
<metric
name="SQ_LDS_IDX_ACTIVE" block=SQ event=99 descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACCUM_PREV_HIRES" block=SQ event=158 descr="For counter N, increment by the value of counter N-1."
></metric>
<metric
name="SQ_WAVES_RESTORED" block=SQ event=159 descr="Count number of context-restored waves sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_SAVED" block=SQ event=160 descr="Count number of context-saved waves. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_INSTS_SMEM_NORM" block=SQ event=161 descr="Number of SMEM instructions issued normalized to match smem_level (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). (per-simd, emulated)"
></metric>
<metric
name="SQC_DCACHE_INPUT_VALID_READYB" block=SQ event=260 descr="Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"
></metric>
<metric
name="SQC_TC_REQ" block=SQ event=262 descr="Total number of TC requests that were issued by instruction and constant caches. (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_INST_REQ" block=SQ event=263 descr="Number of insruction requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_DATA_READ_REQ" block=SQ event=264 descr="Number of data read requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_DATA_WRITE_REQ" block=SQ event=265 descr="Number of data write requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_DATA_ATOMIC_REQ" block=SQ event=266 descr="Number of data atomic requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_STALL" block=SQ event=267 descr="Valid request stalled TC request interface (no-credits). (No-Masking, nondeterministic, unwindowed)"
></metric>
<metric
name="SQC_ICACHE_REQ" block=SQ event=270 descr="Number of requests. (per-SQ, per-Bank)"
></metric>
<metric
name="SQC_ICACHE_HITS" block=SQ event=271 descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_ICACHE_MISSES" block=SQ event=272 descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_ICACHE_MISSES_DUPLICATE" block=SQ event=273 descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_REQ" block=SQ event=290 descr="Number of requests (post-bank-serialization). (per-SQ, per-Bank)"
></metric>
<metric
name="SQC_DCACHE_HITS" block=SQ event=291 descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_MISSES" block=SQ event=292 descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_MISSES_DUPLICATE" block=SQ event=293 descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_ATOMIC" block=SQ event=298 descr="Number of atomic requests. (per-SQ, per-Bank)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_1" block=SQ event=323 descr="Number of constant cache 1 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_2" block=SQ event=324 descr="Number of constant cache 2 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_4" block=SQ event=325 descr="Number of constant cache 4 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_8" block=SQ event=326 descr="Number of constant cache 8 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_16" block=SQ event=327 descr="Number of constant cache 16 dw read requests. (per-SQ)"
></metric>
# TA counters
<metric
name="TA_TA_BUSY" block=TA event=15 descr="TA block is busy. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_TOTAL_WAVEFRONTS" block=TA event=32 descr="Total number of wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_WAVEFRONTS" block=TA event=44 descr="Number of buffer wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_READ_WAVEFRONTS" block=TA event=45 descr="Number of buffer read wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_WRITE_WAVEFRONTS" block=TA event=46 descr="Number of buffer write wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_ATOMIC_WAVEFRONTS" block=TA event=47 descr="Number of buffer atomic wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_TOTAL_CYCLES" block=TA event=49 descr="Number of buffer cycles issued to TC."
></metric>
<metric
name="TA_BUFFER_COALESCED_READ_CYCLES" block=TA event=52 descr="Number of buffer coalesced read cycles issued to TC."
></metric>
<metric
name="TA_BUFFER_COALESCED_WRITE_CYCLES" block=TA event=53 descr="Number of buffer coalesced write cycles issued to TC."
></metric>
<metric
name="TA_ADDR_STALLED_BY_TC_CYCLES" block=TA event=54 descr="Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_ADDR_STALLED_BY_TD_CYCLES" block=TA event=55 descr="Number of cycles addr path stalled by TD. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_DATA_STALLED_BY_TC_CYCLES" block=TA event=56 descr="Number of cycles data path stalled by TC. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_FLAT_WAVEFRONTS" block=TA event=100 descr="Number of flat opcode wavfronts processed by the TA."
></metric>
<metric
name="TA_FLAT_READ_WAVEFRONTS" block=TA event=101 descr="Number of flat opcode reads processed by the TA."
></metric>
<metric
name="TA_FLAT_WRITE_WAVEFRONTS" block=TA event=102 descr="Number of flat opcode writes processed by the TA."
></metric>
<metric
name="TA_FLAT_ATOMIC_WAVEFRONTS" block=TA event=103 descr="Number of flat opcode atomics processed by the TA."
></metric>
# TCA counters
<metric
name="TCA_CYCLE" block=TCA event=1 descr="Number of cycles. Not windowable."
></metric>
<metric
name="TCA_BUSY" block=TCA event=2 descr="Number of cycles we have a request pending. Not windowable."
></metric>
# TCC counters
<metric
name="TCC_CYCLE" block=TCC event=1 descr="Number of cycles. Not windowable."
></metric>
<metric
name="TCC_BUSY" block=TCC event=2 descr="Number of cycles we have a request pending. Not windowable."
></metric>
<metric
name="TCC_REQ" block=TCC event=3 descr="Number of requests of all types. This is measured at the tag block. This may be more than the number of requests arriving at the TCC, but it is a good indication of the total amount of work that needs to be performed."
></metric>
<metric
name="TCC_STREAMING_REQ" block=TCC event=4 descr="Number of streaming requests. This is measured at the tag block."
></metric>
<metric
name="TCC_NC_REQ" block=TCC event=5 descr="The number of noncoherently cached requests. This is measured at the tag block."
></metric>
<metric
name="TCC_UC_REQ" block=TCC event=6 descr="The number of uncached requests. This is measured at the tag block."
></metric>
<metric
name="TCC_CC_REQ" block=TCC event=7 descr="The number of coherently cached requests. This is measured at the tag block."
></metric>
<metric
name="TCC_RW_REQ" block=TCC event=8 descr="The number of RW requests. This is measured at the tag block."
></metric>
<metric
name="TCC_PROBE" block=TCC event=9 descr="Number of probe requests. Not windowable."
></metric>
<metric
name="TCC_PROBE_ALL" block=TCC event=10 descr="Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable."
></metric>
<metric
name="TCC_READ" block=TCC event=12 descr="Number of read requests. Compressed reads are included in this, but metadata reads are not included."
></metric>
<metric
name="TCC_WRITE" block=TCC event=13 descr="Number of write requests."
></metric>
<metric
name="TCC_ATOMIC" block=TCC event=14 descr="Number of atomic requests of all types."
></metric>
<metric
name="TCC_HIT" block=TCC event=17 descr="Number of cache hits."
></metric>
<metric
name="TCC_MISS" block=TCC event=19 descr="Number of cache misses. UC reads count as misses."
></metric>
<metric
name="TCC_WRITEBACK" block=TCC event=22 descr="Number of lines written back to main memory. This includes writebacks of dirty lines and uncached write/atomic requests."
></metric>
<metric
name="TCC_EA_WRREQ" block=TCC event=26 descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."
></metric>
<metric
name="TCC_EA_WRREQ_64B" block=TCC event=27 descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."
></metric>
<metric
name="TCC_EA_WR_UNCACHED_32B" block=TCC event=29 descr="Number of 32-byte write/atomic going over the TC_EA_wrreq interface due to uncached traffic. Note that CC mtypes can produce uncached requests, and those are included in this. A 64-byte request will be counted as 2"
></metric>
<metric
name="TCC_EA_WRREQ_STALL" block=TCC event=30 descr="Number of cycles a write request was stalled."
></metric>
<metric
name="TCC_EA_WRREQ_IO_CREDIT_STALL" block=TCC event=31 descr="Number of cycles a EA write request was stalled because the interface was out of IO credits."
></metric>
<metric
name="TCC_EA_WRREQ_GMI_CREDIT_STALL" block=TCC event=32 descr="Number of cycles a EA write request was stalled because the interface was out of GMI credits."
></metric>
<metric
name="TCC_EA_WRREQ_DRAM_CREDIT_STALL" block=TCC event=33 descr="Number of cycles a EA write request was stalled because the interface was out of DRAM credits."
></metric>
<metric
name="TCC_TOO_MANY_EA_WRREQS_STALL" block=TCC event=34 descr="Number of cycles the TCC could not send a EA write request because it already reached its maximum number of pending EA write requests."
></metric>
<metric
name="TCC_EA_WRREQ_LEVEL" block=TCC event=35 descr="The sum of the number of EA write requests in flight. This is primarily meant for measure average EA write latency. Average write latency = TCC_PERF_SEL_EA_WRREQ_LEVEL/TCC_PERF_SEL_EA_WRREQ."
></metric>
<metric
name="TCC_EA_ATOMIC" block=TCC event=36 descr="Number of transactions going over the TC_EA_wrreq interface that are actually atomic requests."
></metric>
<metric
name="TCC_EA_ATOMIC_LEVEL" block=TCC event=37 descr="The sum of the number of EA atomics in flight. This is primarily meant for measure average EA atomic latency. Average atomic latency = TCC_PERF_SEL_EA_WRREQ_ATOMIC_LEVEL/TCC_PERF_SEL_EA_WRREQ_ATOMIC."
></metric>
<metric
name="TCC_EA_RDREQ" block=TCC event=38 descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"
></metric>
<metric
name="TCC_EA_RDREQ_32B" block=TCC event=39 descr="Number of 32-byte TCC/EA read requests"
></metric>
<metric
name="TCC_EA_RD_UNCACHED_32B" block=TCC event=40 descr="Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be counted as 2"
></metric>
<metric
name="TCC_EA_RDREQ_IO_CREDIT_STALL" block=TCC event=41 descr="Number of cycles there was a stall because the read request interface was out of IO credits. Stalls occur regardless of whether a read needed to be performed or not."
></metric>
<metric
name="TCC_EA_RDREQ_GMI_CREDIT_STALL" block=TCC event=42 descr="Number of cycles there was a stall because the read request interface was out of GMI credits. Stalls occur regardless of whether a read needed to be performed or not."
></metric>
<metric
name="TCC_EA_RDREQ_DRAM_CREDIT_STALL" block=TCC event=43 descr="Number of cycles there was a stall because the read request interface was out of DRAM credits. Stalls occur regardless of whether a read needed to be performed or not."
></metric>
<metric
name="TCC_EA_RDREQ_LEVEL" block=TCC event=44 descr="The sum of the number of TCC/EA read requests in flight. This is primarily meant for measure average EA read latency. Average read latency = TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."
></metric>
<metric
name="TCC_TAG_STALL" block=TCC event=45 descr="Number of cycles the normal request pipeline in the tag was stalled for any reason. Normally, stalls of this nature are measured exactly from one point the pipeline, but that is not the case for this counter. Probes can stall the pipeline at a variety of places, and there is no single point that can reasonably measure the total stalls accurately."
></metric>
<metric
name="TCC_NORMAL_WRITEBACK" block=TCC event=68 descr="Number of writebacks due to requests that are not writeback requests."
></metric>
<metric
name="TCC_ALL_TC_OP_WB_WRITEBACK" block=TCC event=73 descr="Number of writebacks due to all TC_OP writeback requests."
></metric>
<metric
name="TCC_NORMAL_EVICT" block=TCC event=74 descr="Number of evictions due to requests that are not invalidate or probe requests."
></metric>
<metric
name="TCC_ALL_TC_OP_INV_EVICT" block=TCC event=80 descr="Number of evictions due to all TC_OP invalidate requests."
></metric>
<metric
name="TCC_EA_RDREQ_DRAM" block=TCC event=102 descr="Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC)."
></metric>
<metric
name="TCC_EA_WRREQ_DRAM" block=TCC event=103 descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."
></metric>
<metric
name="TCC_CLIENT184_REQ" block=TCC event=312 descr=""
></metric>
<metric
name="TCC_CLIENT185_REQ" block=TCC event=313 descr=""
></metric>
<metric
name="TCC_CLIENT186_REQ" block=TCC event=314 descr=""
></metric>
<metric
name="TCC_CLIENT187_REQ" block=TCC event=315 descr=""
></metric>
<metric
name="TCC_CLIENT188_REQ" block=TCC event=316 descr=""
></metric>
<metric
name="TCC_CLIENT189_REQ" block=TCC event=317 descr=""
></metric>
<metric
name="TCC_CLIENT190_REQ" block=TCC event=318 descr=""
></metric>
<metric
name="TCC_CLIENT191_REQ" block=TCC event=319 descr=""
></metric>
# TCP counters
<metric
name="TCP_GATE_EN1" block=TCP event=0 descr="TCP interface clocks are turned on. Not Windowed."
></metric>
<metric
name="TCP_GATE_EN2" block=TCP event=1 descr="TCP core clocks are turned on. Not Windowed."
></metric>
<metric
name="TCP_TCP_TA_DATA_STALL_CYCLES" block=TCP event=6 descr="TCP stalls TA data interface. Not Windowed."
></metric>
<metric
name="TCP_TD_TCP_STALL_CYCLES" block=TCP event=7 descr="TD stalls TCP"
></metric>
<metric
name="TCP_TCR_TCP_STALL_CYCLES" block=TCP event=8 descr="TCR stalls TCP_TCR_req interface"
></metric>
<metric
name="TCP_READ_TAGCONFLICT_STALL_CYCLES" block=TCP event=11 descr="Tagram conflict stall on a read"
></metric>
<metric
name="TCP_WRITE_TAGCONFLICT_STALL_CYCLES" block=TCP event=12 descr="Tagram conflict stall on a write"
></metric>
<metric
name="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES" block=TCP event=13 descr="Tagram conflict stall on an atomic"
></metric>
<metric
name="TCP_PENDING_STALL_CYCLES" block=TCP event=22 descr="Stall due to data pending from L2"
></metric>
<metric
name="TCP_TA_TCP_STATE_READ" block=TCP event=27 descr="Number of state reads"
></metric>
<metric
name="TCP_VOLATILE" block=TCP event=28 descr="Total number of L1 volatile pixels/buffers from TA"
></metric>
<metric
name="TCP_TOTAL_ACCESSES" block=TCP event=29 descr="Total number of pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_READ+TCP_PERF_SEL_TOTAL_NONREAD"
></metric>
<metric
name="TCP_TOTAL_READ" block=TCP event=30 descr="Total number of read pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_HIT_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_EVICT_READ"
></metric>
<metric
name="TCP_TOTAL_WRITE" block=TCP event=32 descr="Total number of local write pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE+ TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE"
></metric>
<metric
name="TCP_TOTAL_ATOMIC_WITH_RET" block=TCP event=38 descr="Total number of atomic with return pixels/buffers from TA"
></metric>
<metric
name="TCP_TOTAL_ATOMIC_WITHOUT_RET" block=TCP event=39 descr="Total number of atomic without return pixels/buffers from TA"
></metric>
<metric
name="TCP_TOTAL_WRITEBACK_INVALIDATES" block=TCP event=45 descr="Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed."
></metric>
<metric
name="TCP_UTCL1_REQUEST" block=TCP event=47 descr="Total CLIENT_UTCL1 NORMAL requests"
></metric>
<metric
name="TCP_UTCL1_TRANSLATION_MISS" block=TCP event=48 descr="Total utcl1 translation misses"
></metric>
<metric
name="TCP_UTCL1_TRANSLATION_HIT" block=TCP event=49 descr="Total utcl1 translation hits"
></metric>
<metric
name="TCP_UTCL1_PERMISSION_MISS" block=TCP event=50 descr="Total utcl1 permission misses"
></metric>
<metric
name="TCP_TOTAL_CACHE_ACCESSES" block=TCP event=60 descr="Count of total cache line (tag) accesses (includes hits and misses)."
></metric>
<metric
name="TCP_TCP_LATENCY" block=TCP event=65 descr="Total TCP wave latency (from first clock of wave entering to first clock of wave leaving), divide by TA_TCP_STATE_READ to avg wave latency"
></metric>
<metric
name="TCP_TCC_READ_REQ_LATENCY" block=TCP event=66 descr="Total TCP->TCC request latency for reads and atomics with return. Not Windowed."
></metric>
<metric
name="TCP_TCC_WRITE_REQ_LATENCY" block=TCP event=67 descr="Total TCP->TCC request latency for writes and atomics without return. Not Windowed."
></metric>
<metric
name="TCP_TCC_READ_REQ" block=TCP event=69 descr="Total read requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_WRITE_REQ" block=TCP event=70 descr="Total write requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_ATOMIC_WITH_RET_REQ" block=TCP event=71 descr="Total atomic with return requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_ATOMIC_WITHOUT_RET_REQ" block=TCP event=72 descr="Total atomic without return requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_NC_READ_REQ" block=TCP event=75 descr="Total read requests with NC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_NC_WRITE_REQ" block=TCP event=76 descr="Total write requests with NC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_NC_ATOMIC_REQ" block=TCP event=77 descr="Total atomic requests with NC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_UC_READ_REQ" block=TCP event=78 descr="Total read requests with UC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_UC_WRITE_REQ" block=TCP event=79 descr="Total write requests with UC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_UC_ATOMIC_REQ" block=TCP event=80 descr="Total atomic requests with UC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_CC_READ_REQ" block=TCP event=81 descr="Total write requests with CC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_CC_WRITE_REQ" block=TCP event=82 descr="Total write requests with CC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_CC_ATOMIC_REQ" block=TCP event=83 descr="Total atomic requests with CC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_RW_READ_REQ" block=TCP event=85 descr="Total write requests with RW mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_RW_WRITE_REQ" block=TCP event=86 descr="Total write requests with RW mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_RW_ATOMIC_REQ" block=TCP event=87 descr="Total atomic requests with RW mtype from this TCP to all TCCs"
></metric>
# TD counters
<metric
name="TD_TD_BUSY" block=TD event=1 descr="TD is processing or waiting for data. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TD_TC_STALL" block=TD event=15 descr="TD is stalled waiting for TC data."
></metric>
<metric
name="TD_RESERVED_18" block=TD event=18 descr="RESERVED_18"
></metric>
<metric
name="TD_LOAD_WAVEFRONT" block=TD event=25 descr="Count the wavefronts with opcode = load, include atomics and store."
></metric>
<metric
name="TD_ATOMIC_WAVEFRONT" block=TD event=26 descr="Count the wavefronts with opcode = atomic."
></metric>
<metric
name="TD_STORE_WAVEFRONT" block=TD event=27 descr="Count the wavefronts with opcode = store."
></metric>
<metric
name="TD_COALESCABLE_WAVEFRONT" block=TD event=32 descr="Count wavefronts that TA finds coalescable."
></metric>
</gfx908>
@@ -1,163 +0,0 @@
#include "gfx908_metrics.xml"
<gfx9_expr>
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) descr="TA block is busy. Average over TA instances."></metric>
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) descr="TA block is busy. Max over TA instances."></metric>
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) descr="TA block is busy. Min over TA instances."></metric>
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="Number of flat opcode reads processed by the TA. Sum over TA instances."></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="Number of flat opcode writes processed by the TA. Sum over TA instances."></metric>
<metric name="TCC_BUSY_avr" expr=avr(TCC_BUSY,16) descr="TCC_BUSY avr over all memory channels."></metric>
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,16) descr="TCC_REQ sum over all memory channels."></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,16) descr="Number of cache hits. Sum over TCC instances."></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,16) descr="Number of cache misses. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,16) descr="Number of 32-byte TCC/EA read requests. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,16) descr="Number of TCC/EA read requests (either 32-byte or 64-byte). Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,16) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,16) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,16) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr=((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024 descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr=TCC_EA_WRREQ_64B_sum*2+(TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum) descr="The total number of 32-byte effective memory writes."></metric>
#xlu - TA
<metric name="TA_TA_BUSY_sum" expr=sum(TA_TA_BUSY,16) descr="."></metric>
<metric name="TA_TOTAL_WAVEFRONTS_sum" expr=sum(TA_TOTAL_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_ADDR_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TC_CYCLES,16) descr="."></metric>
<metric name="TA_ADDR_STALLED_BY_TD_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TD_CYCLES,16) descr="."></metric>
<metric name="TA_DATA_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_DATA_STALLED_BY_TC_CYCLES,16) descr="."></metric>
<metric name="TA_FLAT_WAVEFRONTS_sum" expr=sum(TA_FLAT_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_FLAT_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_FLAT_ATOMIC_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_READ_WAVEFRONTS_sum" expr=sum(TA_BUFFER_READ_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_WRITE_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WRITE_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_BUFFER_ATOMIC_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_TOTAL_CYCLES_sum" expr=sum(TA_BUFFER_TOTAL_CYCLES,16) descr="."></metric>
<metric name="TA_BUFFER_COALESCED_READ_CYCLES_sum" expr=sum(TA_BUFFER_COALESCED_READ_CYCLES,16) descr="."></metric>
<metric name="TA_BUFFER_COALESCED_WRITE_CYCLES_sum" expr=sum(TA_BUFFER_COALESCED_WRITE_CYCLES,16) descr="."></metric>
#xlu -TD
<metric name="TD_TD_BUSY_sum" expr=sum(TD_TD_BUSY,16) descr="."></metric>
<metric name="TD_TC_STALL_sum" expr=sum(TD_TC_STALL,16) descr="."></metric>
<metric name="TD_LOAD_WAVEFRONT_sum" expr=sum(TD_LOAD_WAVEFRONT,16) descr="."></metric>
<metric name="TD_ATOMIC_WAVEFRONT_sum" expr=sum(TD_ATOMIC_WAVEFRONT,16) descr="."></metric>
<metric name="TD_STORE_WAVEFRONT_sum" expr=sum(TD_STORE_WAVEFRONT,16) descr="."></metric>
<metric name="TD_COALESCABLE_WAVEFRONT_sum" expr=sum(TD_COALESCABLE_WAVEFRONT,16) descr="."></metric>
#xlu -TCP
<metric name="TCP_GATE_EN1_sum" expr=sum(TCP_GATE_EN1,16) descr="."></metric>
<metric name="TCP_GATE_EN2_sum" expr=sum(TCP_GATE_EN2,16) descr="."></metric>
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_sum" expr=sum(TCP_TCP_TA_DATA_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_TD_TCP_STALL_CYCLES_sum" expr=sum(TCP_TD_TCP_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_TCR_TCP_STALL_CYCLES_sum" expr=sum(TCP_TCR_TCP_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_READ_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_READ_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_WRITE_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_PENDING_STALL_CYCLES_sum" expr=sum(TCP_PENDING_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_VOLATILE_sum" expr=sum(TCP_VOLATILE,16) descr="."></metric>
<metric name="TCP_TOTAL_ACCESSES_sum" expr=sum(TCP_TOTAL_ACCESSES,16) descr="."></metric>
<metric name="TCP_TOTAL_READ_sum" expr=sum(TCP_TOTAL_READ,16) descr="."></metric>
<metric name="TCP_TOTAL_WRITE_sum" expr=sum(TCP_TOTAL_WRITE,16) descr="."></metric>
<metric name="TCP_TOTAL_ATOMIC_WITH_RET_sum" expr=sum(TCP_TOTAL_ATOMIC_WITH_RET,16) descr="."></metric>
<metric name="TCP_TOTAL_ATOMIC_WITHOUT_RET_sum" expr=sum(TCP_TOTAL_ATOMIC_WITHOUT_RET,16) descr="."></metric>
<metric name="TCP_TOTAL_WRITEBACK_INVALIDATES_sum" expr=sum(TCP_TOTAL_WRITEBACK_INVALIDATES,16) descr="."></metric>
<metric name="TCP_UTCL1_REQUEST_sum" expr=sum(TCP_UTCL1_REQUEST,16) descr="."></metric>
<metric name="TCP_UTCL1_TRANSLATION_MISS_sum" expr=sum(TCP_UTCL1_TRANSLATION_MISS,16) descr="."></metric>
<metric name="TCP_UTCL1_TRANSLATION_HIT_sum" expr=sum(TCP_UTCL1_TRANSLATION_HIT,16) descr="."></metric>
<metric name="TCP_UTCL1_PERMISSION_MISS_sum" expr=sum(TCP_UTCL1_PERMISSION_MISS,16) descr="."></metric>
<metric name="TCP_TOTAL_CACHE_ACCESSES_sum" expr=sum(TCP_TOTAL_CACHE_ACCESSES,16) descr="."></metric>
<metric name="TCP_TCP_LATENCY_sum" expr=sum(TCP_TCP_LATENCY,16) descr="."></metric>
<metric name="TCP_TA_TCP_STATE_READ_sum" expr=sum(TCP_TA_TCP_STATE_READ,16) descr="."></metric>
<metric name="TCP_TCC_READ_REQ_LATENCY_sum" expr=sum(TCP_TCC_READ_REQ_LATENCY,16) descr="."></metric>
<metric name="TCP_TCC_WRITE_REQ_LATENCY_sum" expr=sum(TCP_TCC_WRITE_REQ_LATENCY,16) descr="."></metric>
<metric name="TCP_TCC_READ_REQ_sum" expr=sum(TCP_TCC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_WRITE_REQ_sum" expr=sum(TCP_TCC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_ATOMIC_WITH_RET_REQ_sum" expr=sum(TCP_TCC_ATOMIC_WITH_RET_REQ,16) descr="."></metric>
<metric name="TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum" expr=sum(TCP_TCC_ATOMIC_WITHOUT_RET_REQ,16) descr="."></metric>
<metric name="TCP_TCC_NC_READ_REQ_sum" expr=sum(TCP_TCC_NC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_NC_WRITE_REQ_sum" expr=sum(TCP_TCC_NC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_NC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_NC_ATOMIC_REQ,16) descr="."></metric>
<metric name="TCP_TCC_UC_READ_REQ_sum" expr=sum(TCP_TCC_UC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_UC_WRITE_REQ_sum" expr=sum(TCP_TCC_UC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_UC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_UC_ATOMIC_REQ,16) descr="."></metric>
<metric name="TCP_TCC_CC_READ_REQ_sum" expr=sum(TCP_TCC_CC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_CC_WRITE_REQ_sum" expr=sum(TCP_TCC_CC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_CC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_CC_ATOMIC_REQ,16) descr="."></metric>
</gfx9_expr>
<gfx908_expr base="gfx9_expr">
<metric name="TCC_BUSY_avr" expr=avr(TCC_BUSY,32) descr="TCC_BUSY avr over all memory channels."></metric>
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,32) descr="TCC_REQ sum over all memory channels."></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,32) descr="Number of cache hits. Sum over TCC instances."></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,32) descr="Number of cache misses. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,32) descr="Number of 32-byte TCC/EA read requests. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,32) descr="Number of TCC/EA read requests (either 32-byte or 64-byte). Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,32) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,32) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,32) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
#xlu - TCP
<metric name="TCP_TCC_RW_READ_REQ_sum" expr=sum(TCP_TCC_RW_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_RW_WRITE_REQ_sum" expr=sum(TCP_TCC_RW_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_RW_ATOMIC_REQ_sum" expr=sum(TCP_TCC_RW_ATOMIC_REQ,16) descr="."></metric>
#xlu - TCC
<metric name="TCC_CYCLE_sum" expr=sum(TCC_CYCLE,32) descr="."></metric>
<metric name="TCC_BUSY_sum" expr=sum(TCC_BUSY,32) descr="."></metric>
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,32) descr="."></metric>
<metric name="TCC_STREAMING_REQ_sum" expr=sum(TCC_STREAMING_REQ,32) descr="."></metric>
<metric name="TCC_NC_REQ_sum" expr=sum(TCC_NC_REQ,32) descr="."></metric>
<metric name="TCC_UC_REQ_sum" expr=sum(TCC_UC_REQ,32) descr="."></metric>
<metric name="TCC_CC_REQ_sum" expr=sum(TCC_CC_REQ,32) descr="."></metric>
<metric name="TCC_RW_REQ_sum" expr=sum(TCC_RW_REQ,32) descr="."></metric>
<metric name="TCC_PROBE_sum" expr=sum(TCC_PROBE,32) descr="."></metric>
<metric name="TCC_PROBE_ALL_sum" expr=sum(TCC_PROBE_ALL,32) descr="."></metric>
<metric name="TCC_READ_sum" expr=sum(TCC_READ,32) descr="."></metric>
<metric name="TCC_WRITE_sum" expr=sum(TCC_WRITE,32) descr="."></metric>
<metric name="TCC_ATOMIC_sum" expr=sum(TCC_ATOMIC,32) descr="."></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,32) descr="."></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,32) descr="."></metric>
<metric name="TCC_TAG_STALL_sum" expr=sum(TCC_TAG_STALL,32) descr="."></metric>
<metric name="TCC_WRITEBACK_sum" expr=sum(TCC_WRITEBACK,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,32) descr="."></metric>
<metric name="TCC_EA_WR_UNCACHED_32B_sum" expr=sum(TCC_EA_WR_UNCACHED_32B,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_STALL_sum" expr=sum(TCC_EA_WRREQ_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_IO_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_IO_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_GMI_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_GMI_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_DRAM_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_TOO_MANY_EA_WRREQS_STALL_sum" expr=sum(TCC_TOO_MANY_EA_WRREQS_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_LEVEL_sum" expr=sum(TCC_EA_WRREQ_LEVEL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_LEVEL_sum" expr=sum(TCC_EA_RDREQ_LEVEL,32) descr="."></metric>
<metric name="TCC_EA_ATOMIC_sum" expr=sum(TCC_EA_ATOMIC,32) descr="."></metric>
<metric name="TCC_EA_ATOMIC_LEVEL_sum" expr=sum(TCC_EA_ATOMIC_LEVEL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,32) descr="."></metric>
<metric name="TCC_EA_RD_UNCACHED_32B_sum" expr=sum(TCC_EA_RD_UNCACHED_32B,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_IO_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_IO_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_GMI_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_GMI_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_DRAM_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_NORMAL_WRITEBACK_sum" expr=sum(TCC_NORMAL_WRITEBACK,32) descr="."></metric>
<metric name="TCC_ALL_TC_OP_WB_WRITEBACK_sum" expr=sum(TCC_ALL_TC_OP_WB_WRITEBACK,32) descr="."></metric>
<metric name="TCC_NORMAL_EVICT_sum" expr=sum(TCC_NORMAL_EVICT,32) descr="."></metric>
<metric name="TCC_ALL_TC_OP_INV_EVICT_sum" expr=sum(TCC_ALL_TC_OP_INV_EVICT,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_DRAM_sum" expr=sum(TCC_EA_RDREQ_DRAM,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_DRAM_sum" expr=sum(TCC_EA_WRREQ_DRAM,32) descr="."></metric>
</gfx908_expr>
<gfx9 base="gfx9_expr"></gfx9>
<gfx908 base="gfx908_expr"> </gfx908>
@@ -49,14 +49,12 @@ from utils.parser import BUILD_IN_VARS, SUPPORTED_DENOM
from utils.specs import MachineSpecs
from utils.utils import (
add_counter_extra_config_input_yaml,
capture_subprocess_output,
convert_metric_id_to_panel_info,
detect_rocprof,
get_submodules,
is_tcc_channel_counter,
mibench,
parse_sets_yaml,
using_v3,
)
@@ -370,8 +368,7 @@ class OmniSoC_Base:
# Handle TCC channel counters: if hw_counter_matches has elems ending with '['
# Expand and interleve the TCC channel counters
# e.g. TCC_HIT[0] TCC_ATOMIC[0] ... TCC_HIT[1] TCC_ATOMIC[1] ...
num_xcd_for_pmc_file = int(self._mspec.num_xcd) if using_v3() else 1
num_xcd_for_pmc_file = int(self._mspec.num_xcd)
for counter_name in counters.copy():
if counter_name.startswith("TCC") and counter_name.endswith("["):
counters.remove(counter_name)
@@ -388,18 +385,6 @@ class OmniSoC_Base:
"""Filter default performance counter set based on user arguments"""
counters, filter_blocks = self.detect_counters()
if not using_v3():
# Counters not supported in rocprof v1 / v2
counters = counters - {
"SQ_INSTS_VALU_MFMA_F8",
"SQ_INSTS_VALU_MFMA_MOPS_F8",
"SQC_DCACHE_INFLIGHT_LEVEL",
"SQC_ICACHE_INFLIGHT_LEVEL",
"SQ_VMEM_WR_TA_DATA_FIFO_FULL",
"SQ_VMEM_TA_ADDR_FIFO_FULL",
"SQ_VMEM_TA_CMD_FIFO_FULL",
}
# TCP_TCP_LATENCY_sum not supported for MI300 (gfx940, gfx941, gfx942)
if self.__arch in ("gfx940", "gfx941", "gfx942"):
counters = counters - {"TCP_TCP_LATENCY_sum"}
@@ -467,84 +452,52 @@ class OmniSoC_Base:
if rocprof_cmd != "rocprofiler-sdk":
console_warning(
"rocprof v1/v2/v3 interfaces will be removed in favor of "
"rocprofiler-sdk interface in a future release. To use "
"rocprofiler-sdk, set ROCPROF to 'rocprofiler-sdk' and "
"optionally provide the path to librocprofiler-sdk.so via "
"--rocprofiler-sdk-library-path."
"rocprofv3 interface is deprecated and will be removed "
"in a future release."
)
rocprof_counters: set[str] = set()
if rocprof_cmd.endswith("rocprof"):
for list_type in ["--list-basic", "--list-derived"]:
command = [rocprof_cmd, list_type]
success, output = capture_subprocess_output(
command, enable_logging=False
)
# return code should be 1 so success should be False
if success:
console_error(
"Failed to list rocprof supported counters using command: "
f"{command}"
)
for line in output.splitlines():
if "gpu-agent" in line:
counters, _ = self.parse_counters_text(
line.split(":")[1].strip()
)
rocprof_counters.update(counters)
elif rocprof_cmd.endswith("rocprofv2"):
command = [rocprof_cmd, "--list-counters"]
success, output = capture_subprocess_output(command, enable_logging=False)
# return code should be 1 so success should be False
if success:
console_error(
"Failed to list rocprof supported counters using command: "
f"{command}"
)
for line in output.splitlines():
if "gfx" in line:
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
rocprof_counters.update(counters)
elif rocprof_cmd.endswith("rocprofv3") or rocprof_cmd == "rocprofiler-sdk":
# Point to counter definition
old_rocprofiler_metrics_path = os.environ.get("ROCPROFILER_METRICS_PATH")
os.environ["ROCPROFILER_METRICS_PATH"] = str(
config.rocprof_compute_home / "rocprof_compute_soc" / "profile_configs"
)
sys.path.append(
str(Path(args.rocprofiler_sdk_library_path).parent.parent / "bin")
)
from rocprofv3_avail_module import avail
avail.loadLibrary.libname = str(
Path(args.rocprofiler_sdk_library_path).parent.parent
/ "lib"
/ "rocprofiler-sdk"
/ "librocprofv3-list-avail.so"
)
counters = avail.get_counters()
rocprof_counters = {
counter.name
for counter in counters[list(counters.keys())[0]]
if hasattr(counter, "block") or hasattr(counter, "expression")
}
# Reset env. var.
if old_rocprofiler_metrics_path is None:
del os.environ["ROCPROFILER_METRICS_PATH"]
else:
os.environ["ROCPROFILER_METRICS_PATH"] = old_rocprofiler_metrics_path
else:
if not (
str(rocprof_cmd).endswith("rocprofv3")
or str(rocprof_cmd) == "rocprofiler-sdk"
):
console_error(
f"Incompatible profiler: {rocprof_cmd}. Supported profilers include: "
f"Incompatible profiler: {rocprof_cmd}. "
"Supported profilers include: "
f"{get_submodules('rocprof_compute_profile')}"
)
# Point to counter definition
old_rocprofiler_metrics_path = os.environ.get("ROCPROFILER_METRICS_PATH")
os.environ["ROCPROFILER_METRICS_PATH"] = str(
config.rocprof_compute_home / "rocprof_compute_soc" / "profile_configs"
)
sys.path.append(
str(
Path(self.get_args().rocprofiler_sdk_library_path).parent.parent / "bin"
)
)
from rocprofv3_avail_module import avail
avail.loadLibrary.libname = str(
Path(self.get_args().rocprofiler_sdk_library_path).parent.parent
/ "lib"
/ "rocprofiler-sdk"
/ "librocprofv3-list-avail.so"
)
counters = avail.get_counters()
rocprof_counters = {
counter.name
for counter in counters[list(counters.keys())[0]]
if hasattr(counter, "block") or hasattr(counter, "expression")
}
# Reset env. var.
if old_rocprofiler_metrics_path is None:
del os.environ["ROCPROFILER_METRICS_PATH"]
else:
os.environ["ROCPROFILER_METRICS_PATH"] = old_rocprofiler_metrics_path
return rocprof_counters
@demarcate
@@ -600,14 +553,7 @@ class OmniSoC_Base:
CounterFile(counter + ".txt", self.__perfmon_config)
)
output_files[-1].add(counter)
if using_v3():
# v3 does not support SQ_ACCUM_PREV_HIRES. Use custom counters
# defined in counter_defs.yaml that utilize accumulate(),
# with _ACCUM suffix.
output_files[-1].add(f"{counter}_ACCUM")
else:
output_files[-1].add("SQ_ACCUM_PREV_HIRES")
output_files[-1].add(f"{counter}_ACCUM")
accu_file_count += 1
file_count = 0
@@ -708,12 +654,12 @@ class OmniSoC_Base:
for ctr in f.blocks[block_name].elements
]:
pmc.append(ctr)
if using_v3() and is_tcc_channel_counter(ctr):
# Add TCC channel counters definitions
if is_tcc_channel_counter(ctr):
counter_name = ctr.split("[")[0]
idx = int(ctr.split("[")[1].split("]")[0])
xcd_idx = idx // int(self._mspec.l2_banks)
channel_idx = idx % int(self._mspec.l2_banks)
expression = (
f"select({counter_name},"
f"[DIMENSION_XCC=[{xcd_idx}], "
@@ -743,16 +689,6 @@ class OmniSoC_Base:
with open(file_name_yaml, "w") as fp:
fp.write(yaml.dump(counter_def, sort_keys=False))
# Add a timestamp file
# TODO: Does v3 need this?
if not using_v3():
timestamp_file = workload_perfmon_dir / "timestamps.txt"
with open(timestamp_file, "w") as fd:
fd.write("pmc:\n\n")
fd.write("gpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
# ----------------------------------------------------
# Required methods to be implemented by child classes
# ----------------------------------------------------
@@ -36,8 +36,7 @@ class gfx908_soc(OmniSoC_Base):
def __init__(self, args: argparse.Namespace, mspec: MachineSpecs) -> None:
super().__init__(args, mspec)
self.set_arch("gfx908")
self.set_compatible_profilers(["rocprofv1", "rocprofv3", "rocprofiler-sdk"])
self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx908"))
@@ -37,8 +37,6 @@ class gfx90a_soc(OmniSoC_Base):
super().__init__(args, mspec)
self.set_arch("gfx90a")
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
@@ -37,8 +37,6 @@ class gfx940_soc(OmniSoC_Base):
super().__init__(args, mspec)
self.set_arch("gfx940")
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
@@ -37,8 +37,6 @@ class gfx941_soc(OmniSoC_Base):
super().__init__(args, mspec)
self.set_arch("gfx941")
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
@@ -37,8 +37,6 @@ class gfx942_soc(OmniSoC_Base):
super().__init__(args, mspec)
self.set_arch("gfx942")
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
+78 -251
View File
@@ -58,7 +58,6 @@ from utils.logger import (
console_warning,
demarcate,
)
from utils.mi_gpu_spec import mi_gpu_specs
rocprof_cmd = ""
rocprof_args = ""
@@ -144,40 +143,6 @@ def add_counter_extra_config_input_yaml(
return data
def extract_counter_info_extra_config_input_yaml(
data: dict[str, Any], counter_name: str
) -> Optional[dict]:
"""
Extract the full counter dictionary from 'data' for the given counter_name.
Args:
data (dict): The source YAML dict.
counter_name (str): The counter to find.
Returns:
Optional[dict]: The full counter dict if found, else None.
"""
counters = data.get("rocprofiler-sdk", {}).get("counters", [])
for counter in counters:
if counter.get("name") == counter_name:
return counter
return None
def using_v1() -> bool:
return "ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprof")
def using_v3() -> bool:
return "ROCPROF" not in os.environ.keys() or (
"ROCPROF" in os.environ.keys()
and (
os.environ["ROCPROF"].endswith("rocprofv3")
or os.environ["ROCPROF"] == "rocprofiler-sdk"
)
)
def get_version(rocprof_compute_home: Path) -> dict[str, str]:
"""Return ROCm Compute Profiler versioning info"""
@@ -240,7 +205,8 @@ def detect_rocprof(args: argparse.Namespace) -> str:
"""Detect loaded rocprof version. Resolve path and set cmd globally."""
global rocprof_cmd
if os.environ.get("ROCPROF") == "rocprofiler-sdk":
# Default is rocprofiler-sdk
if os.environ.get("ROCPROF", "rocprofiler-sdk") == "rocprofiler-sdk":
if not Path(args.rocprofiler_sdk_library_path).exists():
console_error(
"Could not find rocprofiler-sdk library at "
@@ -249,45 +215,22 @@ def detect_rocprof(args: argparse.Namespace) -> str:
rocprof_cmd = "rocprofiler-sdk"
console_debug(f"rocprof_cmd is {rocprof_cmd}")
console_debug(f"rocprofiler_sdk_path is {args.rocprofiler_sdk_library_path}")
return rocprof_cmd
# detect rocprof
if not "ROCPROF" in os.environ.keys():
# default rocprof
rocprof_cmd = "rocprofv3"
else:
# If ROCPROF is not set to rocprofiler-sdk
rocprof_cmd = os.environ["ROCPROF"]
# resolve rocprof path
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
rocprof_cmd = "rocprofv3"
console_warning(
f"Unable to resolve path to {rocprof_cmd} binary. Reverting to default."
)
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
console_error(
"Please verify installation or set ROCPROF environment variable "
"with full path."
f"Unable to resolve path to {rocprof_cmd} binary. "
"Please verify installation or set ROCPROF "
"environment variable with full path."
)
else:
# Resolve any sym links in file path
rocprof_path = str(Path(rocprof_path.rstrip("\n")).resolve())
console_debug(f"rocprof_cmd is {str(rocprof_cmd)}")
console_debug(f"ROC Profiler: {rocprof_path}")
console_debug(f"rocprof_cmd is {rocprof_cmd}")
return rocprof_cmd
# TODO: v1/v2 function, to be removed
def store_app_cmd(args: argparse.Namespace) -> None:
global rocprof_args
rocprof_args = args
@demarcate
def capture_subprocess_output(
subprocess_args: list[str],
new_env: Optional[dict[str, str]] = None,
@@ -766,47 +709,40 @@ def run_prof(
default_options = ["-i", fname]
options = default_options + cast(list[str], profiler_options)
if using_v3():
if rocprof_cmd == "rocprofiler-sdk":
options["ROCPROF_AGENT_INDEX"] = "absolute"
else:
options = ["-A", "absolute"] + options
if rocprof_cmd == "rocprofiler-sdk":
options["ROCPROF_AGENT_INDEX"] = "absolute"
else:
if is_mode_live_attach:
console_error(
"The live attach/detach only supports rocprofv3 or rocprofiler-sdk"
)
options = ["-A", "absolute"] + options
new_env = os.environ.copy()
if using_v3():
# Counter definitions
with open(
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ "counter_defs.yaml",
) as file:
counter_defs = yaml.safe_load(file)
# Extra counter definitions
if fpath.with_suffix(".yaml").exists():
with open(fpath.with_suffix(".yaml")) as file:
counter_defs["rocprofiler-sdk"]["counters"].extend(
yaml.safe_load(file)["rocprofiler-sdk"]["counters"]
)
# Write counter definitions to a temporary file
tmpfile_path = (
Path(tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp"))
/ "counter_defs.yaml"
)
with open(tmpfile_path, "w") as tmpfile:
yaml.dump(counter_defs, tmpfile, default_flow_style=False, sort_keys=False)
# Set counter definitions
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
console_debug(
"Adding env var for counter definitions: "
f"ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
)
# Counter definitions
with open(
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ "counter_defs.yaml",
) as file:
counter_defs = yaml.safe_load(file)
# Extra counter definitions
if Path(fname).with_suffix(".yaml").exists():
with open(Path(fname).with_suffix(".yaml")) as file:
counter_defs["rocprofiler-sdk"]["counters"].extend(
yaml.safe_load(file)["rocprofiler-sdk"]["counters"]
)
# Write counter definitions to a temporary file
tmpfile_path = (
Path(tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp"))
/ "counter_defs.yaml"
)
with open(tmpfile_path, "w") as tmpfile:
yaml.dump(counter_defs, tmpfile, default_flow_style=False, sort_keys=False)
# Set counter definitions
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
console_debug(
"Adding env var for counter definitions: "
f"ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
)
# set required env var for >= mi300
if mspec.gpu_model.lower() not in (
@@ -910,92 +846,59 @@ def run_prof(
results_files: list[str] = []
if format_rocprof_output == "rocpd":
if rocprof_cmd == "rocprofiler-sdk" or rocprof_cmd.endswith("v3"):
# Write results_fbase.csv
rocpd_data.convert_db_to_csv(
glob.glob(f"{workload_dir}/out/pmc_1/*/*.db")[0],
f"{workload_dir}/results_{fbase}.csv",
# Write results_fbase.csv
rocpd_data.convert_db_to_csv(
glob.glob(workload_dir + "/out/pmc_1/*/*.db")[0],
workload_dir + f"/results_{fbase}.csv",
)
if retain_rocpd_output:
shutil.copyfile(
glob.glob(workload_dir + "/out/pmc_1/*/*.db")[0],
workload_dir + "/" + fbase + ".db",
)
if retain_rocpd_output:
shutil.copyfile(
glob.glob(f"{workload_dir}/out/pmc_1/*/*.db")[0],
f"{workload_dir}/{fbase}.db",
)
console_warning(
f"Retaining large raw rocpd database: {workload_dir}/{fbase}.db"
)
# Remove temp directory
shutil.rmtree(f"{workload_dir}/out")
return
else:
console_error(
"rocpd output format is only supported with "
"rocprofiler-sdk or rocprofv3."
console_warning(
f"Retaining large raw rocpd database: {workload_dir}/{fbase}.db"
)
elif rocprof_cmd.endswith("v2"):
# rocprofv2 has separate csv files for each process
results_files = glob.glob(f"{workload_dir}/out/pmc_1/results_*.csv")
# Remove temp directory
shutil.rmtree(workload_dir + "/" + "out")
return
if len(results_files) == 0:
return
# rocprofv3 requires additional processing for each process
results_files = process_rocprofv3_output(
format_rocprof_output, workload_dir, is_timestamps
)
# Combine results into single CSV file
combined_results = pd.concat(
[pd.read_csv(f) for f in results_files], ignore_index=True
)
# Overwrite column to ensure unique IDs.
combined_results["Dispatch_ID"] = range(0, len(combined_results))
combined_results.to_csv(
f"{workload_dir}/out/pmc_1/results_{fbase}.csv", index=False
)
elif rocprof_cmd.endswith("v3") or rocprof_cmd == "rocprofiler-sdk":
# rocprofv3 requires additional processing for each process
results_files = process_rocprofv3_output(
format_rocprof_output, workload_dir, is_timestamps
)
if rocprof_cmd == "rocprofiler-sdk":
if rocprof_cmd == "rocprofiler-sdk":
# TODO: as rocprofv3 --kokkos-trace feature improves,
# rocprof-compute should make updates accordingly
if "ROCPROF_HIP_RUNTIME_API_TRACE" in options:
process_hip_trace_output(workload_dir, fbase)
else:
if "--kokkos-trace" in options:
# TODO: as rocprofv3 --kokkos-trace feature improves,
# rocprof-compute should make updates accordingly
if "ROCPROF_HIP_RUNTIME_API_TRACE" in options:
process_hip_trace_output(workload_dir, fbase)
else:
if "--kokkos-trace" in options:
# TODO: as rocprofv3 --kokkos-trace feature improves,
# rocprof-compute should make updates accordingly
process_kokkos_trace_output(workload_dir, fbase)
elif "--hip-trace" in options:
process_hip_trace_output(workload_dir, fbase)
process_kokkos_trace_output(workload_dir, fbase)
elif "--hip-trace" in options:
process_hip_trace_output(workload_dir, fbase)
if not results_files:
console_warning(
f"Cannot write results for {fbase}.csv due to no counter "
"csv files generated."
)
return
# Combine results into single CSV file
# Combine results into single CSV file
if results_files:
combined_results = pd.concat(
[pd.read_csv(f) for f in results_files], ignore_index=True
)
# Overwrite column to ensure unique IDs.
combined_results["Dispatch_ID"] = range(0, len(combined_results))
combined_results.to_csv(
f"{workload_dir}/out/pmc_1/results_{fbase}.csv", index=False
else:
console_warning(
f"Cannot write results for {fbase}.csv due to no counter "
"csv files generated."
)
return
if not using_v3() and not using_v1():
# flatten tcc for applicable mi300 input
f = f"{workload_dir}/out/pmc_1/results_{fbase}.csv"
xcds = mi_gpu_specs.get_num_xcds(
mspec.gpu_arch, mspec.gpu_model, mspec.compute_partition
)
df = flatten_tcc_info_across_xcds(f, xcds, int(mspec.l2_banks))
df.to_csv(f, index=False)
# Overwrite column to ensure unique IDs.
combined_results["Dispatch_ID"] = range(0, len(combined_results))
combined_results.to_csv(
workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False
)
if Path(f"{workload_dir}/out").exists():
# copy and remove out directory if needed
@@ -1226,26 +1129,6 @@ def process_hip_trace_output(workload_dir: str, fbase: str) -> None:
)
def replace_timestamps(workload_dir: str) -> None:
ts_path = Path(workload_dir) / "timestamps.csv"
if not ts_path.is_file():
return
df_stamps = pd.read_csv(ts_path)
if "Start_Timestamp" in df_stamps.columns and "End_Timestamp" in df_stamps.columns:
# Update timestamps for all *.csv output files
for fname in glob.glob(f"{workload_dir}/*.csv"):
if Path(fname).name != "sysinfo.csv":
df_pmc_perf = pd.read_csv(fname)
df_pmc_perf["Start_Timestamp"] = df_stamps["Start_Timestamp"]
df_pmc_perf["End_Timestamp"] = df_stamps["End_Timestamp"]
df_pmc_perf.to_csv(fname, index=False)
else:
console_warning(
"Incomplete profiling data detected. Unable to update timestamps.\n"
)
@demarcate
def gen_sysinfo(
workload_name: str,
@@ -1383,62 +1266,6 @@ def mibench(args: argparse.Namespace, mspec: Any) -> None: # noqa: ANN401
subprocess.run(my_args, check=True)
def flatten_tcc_info_across_xcds(
file: str, xcds: int, tcc_channel_per_xcd: int
) -> pd.DataFrame:
"""
Flatten TCC per channel counters across all XCDs in partition.
NB: This func highly depends on the default behavior of rocprofv2 on MI300,
which might be broken anytime in the future!
"""
df_orig = pd.read_csv(file)
### prepare column headers
tcc_cols_orig = []
non_tcc_cols_orig = []
for c in df_orig.columns.to_list():
if "TCC" in c:
tcc_cols_orig.append(c)
else:
non_tcc_cols_orig.append(c)
cols = non_tcc_cols_orig[:]
tcc_cols_in_group: dict[int, list[str]] = {i: [] for i in range(xcds)}
for col in tcc_cols_orig:
for i in range(xcds):
# filter the channel index only
p = re.compile(r"\[(\d+)\]")
# pick up the 1st element only
def replacement(match: re.Match[str]) -> str:
return f"[{int(match.group(1)) + i * tcc_channel_per_xcd}]"
tcc_cols_in_group[i].append(re.sub(pattern=p, repl=replacement, string=col))
for i in range(xcds):
cols += tcc_cols_in_group[i]
df = pd.DataFrame(columns=cols)
### Rearrange data with extended column names
for idx in range(0, len(df_orig.index), xcds):
# assume the front none TCC columns are the same for all XCCs
df_non_tcc = df_orig.iloc[idx].filter(regex=r"^(?!.*TCC).*$")
flatten_list = df_non_tcc.tolist()
# extract all tcc from one dispatch
# NB: assuming default contiguous order might not be safe!
df_tcc_all = df_orig.iloc[idx : (idx + xcds)].filter(regex="TCC")
for idx, row in df_tcc_all.iterrows():
flatten_list += row.tolist()
# NB: It is not the best perf to append a row once a time
df.loc[len(df.index)] = flatten_list
return df
def get_submodules(package_name: str) -> list[str]:
"""List all submodules for a target package"""
import importlib
@@ -108,7 +108,6 @@ ALL_CSVS_MI200 = sorted([
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"sysinfo.csv",
"timestamps.csv",
])
ALL_CSVS_MI300 = sorted([
"SQC_DCACHE_INFLIGHT_LEVEL.csv",
@@ -126,7 +125,6 @@ ALL_CSVS_MI300 = sorted([
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"sysinfo.csv",
"timestamps.csv",
])
ALL_CSVS_MI350 = sorted([
"SQC_DCACHE_INFLIGHT_LEVEL.csv",
@@ -155,13 +153,13 @@ ALL_CSVS_MI350 = sorted([
ROOF_ONLY_FILES = sorted([
"empirRoof_gpu-0_FP32.pdf",
"kernelName_legend.pdf",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"roofline.csv",
"sysinfo.csv",
"timestamps.csv",
])
PC_SAMPLING_HOST_TRAP_FILES = sorted([
@@ -364,18 +362,7 @@ def gpu_soc():
soc = gpu_soc()
os.environ["ROCPROF"] = "rocprofv3"
def using_v3():
return "ROCPROF" not in os.environ.keys() or (
"ROCPROF" in os.environ.keys()
and (
os.environ["ROCPROF"].endswith("rocprofv3")
or os.environ["ROCPROF"] == "rocprofiler-sdk"
)
)
os.environ["ROCPROF"] = "rocprofiler-sdk"
Baseline_dir = str(Path("tests/workloads/vcopy/" + soc).resolve())
@@ -568,19 +555,11 @@ def test_path(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"This test is not supported for {soc}")
assert 0
@@ -628,15 +607,7 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
else:
expected_files = (
[f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
if using_v3()
else ROOF_ONLY_FILES
) + ["kernelName_legend.pdf"]
assert sorted(list(file_dict.keys())) == sorted(expected_files)
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -678,12 +649,7 @@ def test_roof_multiple_data_types(binary_handler_profile_rocprof_compute):
assert os.path.exists(f"{workload_dir}/pmc_perf.csv")
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
expected_files = (
[f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
if using_v3()
else ROOF_ONLY_FILES
) + ["kernelName_legend.pdf"]
assert sorted(list(file_dict.keys())) == sorted(expected_files)
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
else:
pass
finally:
@@ -1200,19 +1166,11 @@ def test_device_filter(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1238,19 +1196,11 @@ def test_kernel(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1274,19 +1224,11 @@ def test_dispatch_0(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1314,19 +1256,11 @@ def test_dispatch_0_1(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1351,19 +1285,11 @@ def test_dispatch_2(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1391,19 +1317,11 @@ def test_join_type_grid(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1428,19 +1346,11 @@ def test_join_type_kernel(binary_handler_profile_rocprof_compute):
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
elif soc == "MI200":
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI200 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI200
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200
elif "MI300" in soc:
assert sorted(list(file_dict.keys())) == sorted(
[f for f in ALL_CSVS_MI300 if f != "timestamps.csv"]
if using_v3()
else ALL_CSVS_MI300
)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300
elif "MI350" in soc:
assert sorted(list(file_dict.keys())) == sorted(ALL_CSVS_MI350)
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350
else:
print(f"Testing isn't supported yet for {soc}")
assert 0
@@ -1473,12 +1383,11 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
assert (
sorted(list(file_dict.keys()))
== [f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
if using_v3()
else ROOF_ONLY_FILES
)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
validate(
inspect.stack()[0][3],
@@ -1508,12 +1417,10 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
assert (
sorted(list(file_dict.keys()))
== [f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
if using_v3()
else ROOF_ONLY_FILES
)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
validate(
inspect.stack()[0][3],
@@ -1543,12 +1450,10 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
assert (
sorted(list(file_dict.keys()))
== [f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
if using_v3()
else ROOF_ONLY_FILES
)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
validate(
inspect.stack()[0][3],
@@ -1578,12 +1483,10 @@ def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
assert (
sorted(list(file_dict.keys()))
== [f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
if using_v3()
else ROOF_ONLY_FILES
)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
validate(
inspect.stack()[0][3],
@@ -1873,10 +1776,6 @@ def test_pc_sampling_stochastic(binary_handler_profile_rocprof_compute):
@pytest.mark.live_attach_detach
def test_live_attach_detach_block(binary_handler_profile_rocprof_compute):
if not using_v3():
assert True
return
options = ["--block", "3.1.1", "4.1.1", "5.1.1"]
workload_dir = test_utils.get_output_dir()
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"])
@@ -1930,10 +1829,6 @@ def test_live_attach_detach_block(binary_handler_profile_rocprof_compute):
def test_live_attach_detach_singlepath_launch_stats(
binary_handler_profile_rocprof_compute,
):
if not using_v3():
assert True
return
options = ["--set", "launch_stats"]
workload_dir = test_utils.get_output_dir()
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"])
+38 -716
View File
@@ -384,7 +384,7 @@ def test_detect_rocprof_env_rocprof_not_found(monkeypatch):
rocprofiler_sdk_library_path = "/fake/path"
# Set ROCPROF to 'rocprof'
monkeypatch.setenv("ROCPROF", "rocprof")
monkeypatch.setenv("ROCPROF", "rocprofv3")
# shutil.which returns None for 'rocprof'
monkeypatch.setattr("shutil.which", lambda cmd: None)
# Track calls to console_warning and console_error
@@ -403,7 +403,6 @@ def test_detect_rocprof_env_rocprof_not_found(monkeypatch):
with pytest.raises(RuntimeError, match="console_error called"):
utils_mod.detect_rocprof(DummyArgs())
assert any("Unable to resolve path to rocprofv3 binary" in w for w in warnings)
assert any(
"Please verify installation or set ROCPROF environment variable" in e
for e in errors
@@ -452,10 +451,7 @@ def test_detect_rocprof_env_not_set(monkeypatch):
rocprofiler_sdk_library_path = "/fake/path"
monkeypatch.delenv("ROCPROF", raising=False)
monkeypatch.setattr(
"shutil.which", lambda cmd: "/usr/bin/rocprofv3" if cmd == "rocprofv3" else None
)
monkeypatch.setattr("pathlib.Path.resolve", lambda self: self)
monkeypatch.setattr("pathlib.Path.exists", lambda _: True)
logs = []
monkeypatch.setattr(
"utils.utils.console_debug", lambda msg, *a, **k: logs.append(str(msg))
@@ -463,10 +459,10 @@ def test_detect_rocprof_env_not_set(monkeypatch):
import utils.utils as utils_mod
result = utils_mod.detect_rocprof(DummyArgs())
assert result == "rocprofv3"
assert result == "rocprofiler-sdk"
assert any(
"ROC Profiler: /usr/bin/rocprofv3" in log_entry
or "rocprof_cmd is rocprofv3" in log_entry
"rocprofiler_sdk_path is /fake/path" in log_entry
or "rocprof_cmd is rocprofiler-sdk" in log_entry
for log_entry in logs
)
@@ -2379,9 +2375,9 @@ def test_parse_text_file_not_found():
# =============================================================================
def test_run_prof_success_v2(tmp_path, monkeypatch):
def test_run_prof_success_v3(tmp_path, monkeypatch):
"""
Test run_prof with rocprofv2 successful execution.
Test run_prof with rocprofv3 successful execution.
Args:
tmp_path (Path): Temporary directory for test files.
@@ -2395,7 +2391,13 @@ def test_run_prof_success_v2(tmp_path, monkeypatch):
workload_dir = str(tmp_path / "workload")
os.makedirs(workload_dir + "/out/pmc_1", exist_ok=True)
csv_content = "Dispatch_ID,GPU_ID,Kernel_Name\n0,0,test_kernel"
csv_content = (
"Agent_Type,Node_Id,Wave_Front_Size,Correlation_Id,Dispatch_Id,Agent_Id,Queue_Id,Process_Id,Thread_Id,"
"Grid_Size,Kernel_Id,Kernel_Name,Workgroup_Size,LDS_Block_Size,"
"Scratch_Size,VGPR_Count,Accum_VGPR_Count,SGPR_Count,Start_Timestamp,"
"End_Timestamp,Counter_Name,Counter_Value\n"
"GPU,0,0,0,0,0,0,0,0,0,0,test_kernel,0,0,0,0,0,0,0,1,SQ_WAVES,100"
)
with open(workload_dir + "/out/pmc_1/results_0.csv", "w") as f:
f.write(csv_content)
@@ -2408,12 +2410,10 @@ def test_run_prof_success_v2(tmp_path, monkeypatch):
mspec = MockSpec()
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofv2")
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofv3")
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: False)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
monkeypatch.setattr(
@@ -2458,8 +2458,6 @@ def test_run_prof_success_v3_csv(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
monkeypatch.setattr(
@@ -2510,7 +2508,6 @@ def test_run_prof_success_rocprofiler_sdk(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
monkeypatch.setattr("utils.utils.parse_text", lambda f: ["SQ_WAVES"])
monkeypatch.setattr("utils.utils.process_rocprofv3_output", lambda *a, **k: [])
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
@@ -2554,8 +2551,6 @@ def test_run_prof_with_yaml_config(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr("utils.utils.process_rocprofv3_output", lambda *a, **k: [])
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
@@ -2597,8 +2592,6 @@ def test_run_prof_failure_subprocess(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (False, "error output")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
@@ -2651,8 +2644,6 @@ def test_run_prof_mi300_environment_setup(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", mock_capture_subprocess_output
)
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr("utils.utils.process_rocprofv3_output", lambda *a, **k: [])
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
@@ -2692,7 +2683,13 @@ def test_run_prof_timestamps_special_case(tmp_path, monkeypatch):
mspec = MockSpec()
csv_content = "Dispatch_ID,Start_Timestamp,End_Timestamp\n0,100,200"
csv_content = (
"Agent_Type,Node_Id,Wave_Front_Size,Correlation_Id,Dispatch_Id,Agent_Id,Queue_Id,Process_Id,Thread_Id,"
"Grid_Size,Kernel_Id,Kernel_Name,Workgroup_Size,LDS_Block_Size,"
"Scratch_Size,VGPR_Count,Accum_VGPR_Count,SGPR_Count,Start_Timestamp,"
"End_Timestamp,Counter_Name,Counter_Value\n"
"GPU,0,0,0,0,0,0,0,0,0,0,test_kernel,0,0,0,0,0,0,0,1,SQ_WAVES,100"
)
with open(workload_dir + "/kernel_trace.csv", "w") as f:
f.write(csv_content)
@@ -2702,8 +2699,6 @@ def test_run_prof_timestamps_special_case(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr(
"utils.utils.process_rocprofv3_output", lambda *a, **k: csv_files
)
@@ -2752,8 +2747,6 @@ def test_run_prof_no_results_files(tmp_path, monkeypatch):
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: False)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr("glob.glob", lambda pattern: []) # No files found
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
@@ -2790,46 +2783,31 @@ def test_run_prof_header_standardization(tmp_path, monkeypatch):
mspec = MockSpec()
csv_content = (
"KernelName,Index,grd,gpu-id,BeginNs,EndNs\ntest_kernel,0,64,0,100,200"
"Agent_Type,Node_Id,Wave_Front_Size,Correlation_Id,Dispatch_Id,Agent_Id,Queue_Id,Process_Id,Thread_Id,"
"Grid_Size,Kernel_Id,Kernel_Name,Workgroup_Size,LDS_Block_Size,"
"Scratch_Size,VGPR_Count,Accum_VGPR_Count,SGPR_Count,Start_Timestamp,"
"End_Timestamp,Counter_Name,Counter_Value\n"
"GPU,0,0,0,0,0,0,0,0,0,0,test_kernel,0,0,0,0,0,0,0,1,SQ_WAVES,100"
)
with open(workload_dir + "/out/pmc_1/results_test.csv", "w") as f:
f.write(csv_content)
old_headers_df = pd.DataFrame({
"KernelName": ["test_kernel"],
"Index": [0],
"grd": [64],
"gpu-id": [0],
"BeginNs": [100],
"EndNs": [200],
})
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofv2")
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofv3")
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: False)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr(
"glob.glob", lambda pattern: [workload_dir + "/out/pmc_1/results_test.csv"]
)
monkeypatch.setattr("utils.utils.console_debug", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_log", lambda *a, **k: None)
read_calls = []
def mock_read_csv(path, **kwargs):
read_calls.append(path)
return old_headers_df.copy()
write_calls = []
def mock_to_csv(self, path, **kwargs):
write_calls.append((path, self.columns.tolist()))
monkeypatch.setattr("pandas.read_csv", mock_read_csv)
monkeypatch.setattr("pandas.DataFrame.to_csv", mock_to_csv)
monkeypatch.setattr("pandas.concat", lambda dfs, **k: old_headers_df.copy())
import utils.utils as utils_mod
@@ -2837,9 +2815,8 @@ def test_run_prof_header_standardization(tmp_path, monkeypatch):
final_headers = write_calls[-1][1] if write_calls else []
assert "Kernel_Name" in final_headers
assert "Dispatch_ID" in final_headers
assert "Dispatch_Id" in final_headers
assert "Grid_Size" in final_headers
assert "GPU_ID" in final_headers
assert "Start_Timestamp" in final_headers
assert "End_Timestamp" in final_headers
@@ -2868,28 +2845,12 @@ def test_run_prof_tcc_flattening_mi300(tmp_path, monkeypatch):
mspec = MockSpec()
flatten_called = False
def mock_flatten_tcc_info_across_xcds(file, xcds, l2_banks):
nonlocal flatten_called
flatten_called = True
return pd.DataFrame({
"Dispatch_ID": [0],
"TCC_HIT[0]": [100],
"TCC_HIT[16]": [200],
})
# Mock functions
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofv2")
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofv3")
monkeypatch.setattr(
"utils.utils.capture_subprocess_output", lambda *a, **k: (True, "success")
)
monkeypatch.setattr("utils.utils.using_v3", lambda: False)
monkeypatch.setattr("utils.utils.using_v1", lambda: False)
monkeypatch.setattr(
"utils.utils.flatten_tcc_info_across_xcds", mock_flatten_tcc_info_across_xcds
)
monkeypatch.setattr("utils.utils.mi_gpu_specs.get_num_xcds", lambda *a: 2)
monkeypatch.setattr("utils.mi_gpu_spec.mi_gpu_specs.get_num_xcds", lambda *a: 2)
monkeypatch.setattr(
"glob.glob", lambda pattern: [workload_dir + "/results_test.csv"]
)
@@ -2907,8 +2868,6 @@ def test_run_prof_tcc_flattening_mi300(tmp_path, monkeypatch):
# Execute function
utils_mod.run_prof(str(fname), ["--arg"], workload_dir, mspec, logging.INFO, "csv")
assert flatten_called
import utils.utils as utils_mod # noqa
@@ -2934,7 +2893,6 @@ def test_run_prof_sdk_creates_new_env_copy(tmp_path, monkeypatch):
workload_dir_str = str(tmp_path)
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofiler-sdk")
monkeypatch.setattr("utils.utils.using_v3", lambda: False)
monkeypatch.setattr("utils.utils.process_rocprofv3_output", lambda *a, **k: [])
capture_subprocess_called_with_env = None
@@ -2957,10 +2915,12 @@ def test_run_prof_sdk_creates_new_env_copy(tmp_path, monkeypatch):
"utils.utils.parse_text", lambda *a, **k: ["COUNTER1", "COUNTER2"]
)
mock_fname_path_obj = mock.Mock(spec=Path)
mock_fname_path_obj = mock.MagicMock(spec=Path)
mock_fname_path_obj.stem = "counters"
mock_fname_path_obj.name = "counters.txt"
mock_fname_path_obj.with_suffix.return_value.exists.return_value = False
mock_fname_path_obj.__truediv__.return_value = mock.Mock(spec=Path)
mock_out_path_obj = mock.Mock(spec=Path)
mock_out_path_obj.exists.return_value = False
@@ -2999,6 +2959,7 @@ def test_run_prof_sdk_creates_new_env_copy(tmp_path, monkeypatch):
monkeypatch.setattr("shutil.copyfile", lambda *a, **k: None)
monkeypatch.setattr("shutil.rmtree", lambda *a, **k: None)
monkeypatch.setattr("utils.utils.console_warning", lambda *a, **k: None)
monkeypatch.setattr("builtins.open", lambda *a, **k: io.StringIO(""))
utils_mod.run_prof(
fname_str,
@@ -3030,7 +2991,7 @@ def test_run_prof_v3_sdk_and_cli_calls_trace_processing(tmp_path, monkeypatch):
Line 5 (CLI): elif "--hip-trace" in options:
process_hip_trace_output(...)
"""
fname_str = str(tmp_path / "counters.txt")
fname_str = str(tmp_path) + "/counters.txt"
Path(fname_str).touch()
fbase_str = "counters"
workload_dir_str = str(tmp_path)
@@ -3041,7 +3002,7 @@ def test_run_prof_v3_sdk_and_cli_calls_trace_processing(tmp_path, monkeypatch):
)
monkeypatch.setattr(
"utils.utils.process_rocprofv3_output",
lambda *a, **k: [str(tmp_path / "results1.csv")],
lambda *a, **k: [str(tmp_path) + "/results1.csv"],
)
hip_trace_called_with = None
@@ -3096,15 +3057,13 @@ def test_run_prof_v3_sdk_and_cli_calls_trace_processing(tmp_path, monkeypatch):
monkeypatch.setattr("shutil.copyfile", lambda *a, **k: None)
monkeypatch.setattr("shutil.rmtree", lambda *a, **k: None)
monkeypatch.setattr("builtins.open", lambda *a, **k: io.StringIO(""))
monkeypatch.setattr("utils.utils.flatten_tcc_info_across_xcds", lambda df, *a: df)
monkeypatch.setattr("utils.utils.mi_gpu_specs.get_num_xcds", lambda *a: 1)
monkeypatch.setattr("utils.mi_gpu_spec.mi_gpu_specs.get_num_xcds", lambda *a: 1)
mspec = MockMSpec()
loglevel = logging.INFO
format_rocprof_output = True
monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofiler-sdk")
monkeypatch.setattr("utils.utils.using_v3", lambda: True)
profiler_options_sdk_hip = {
"APP_CMD": "my_app",
@@ -5458,306 +5417,6 @@ def test_mibench_console_log_called(tmp_path, monkeypatch):
assert console_log_calls[0][1] == "No roofline data found. Generating..."
# =============================================================================
# TESTS FOR flatten_tcc_info_across_xcds
# =============================================================================
"""
Normal Functionality:
Basic single XCD operation
Multiple XCD channel renumbering
Complex channel index patterns
Multiple dispatch handling
Edge Cases:
Empty dataframes
Zero XCDs
Insufficient data
Large channel numbers
Column Handling:
No TCC columns
TCC-only columns
Mixed TCC/non-TCC columns
Irregular TCC naming patterns
Error Conditions:
File not found errors
Invalid input validation
Performance & Data Integrity:
Large dataset handling
Data preservation validation
Regex pattern validation
"""
def test_flatten_tcc_info_across_xcds_zero_xcds(tmp_path):
"""
Test edge case with zero XCDs.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Asserts function handles zero XCDs edge case by raising ValueError.
"""
columns = ["Kernel_Name", "TCC_HIT[0]"]
data = [["kernel1", 100]]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_zero_xcds.csv"
df.to_csv(csv_file, index=False)
import utils.utils as utils_mod
with pytest.raises(ValueError, match="range\\(\\) arg 3 must not be zero"):
utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=0, tcc_channel_per_xcd=4
)
def test_flatten_tcc_info_across_xcds_insufficient_data(tmp_path):
"""
Test when there's insufficient data for the specified XCDs.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Asserts function raises ValueError when trying
to process insufficient data.
"""
columns = ["Kernel_Name", "TCC_HIT[0]"]
data = [["kernel1", 100]]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_insufficient.csv"
df.to_csv(csv_file, index=False)
import utils.utils as utils_mod
with pytest.raises(ValueError, match="cannot set a row with mismatched columns"):
utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=3, tcc_channel_per_xcd=4
)
def test_flatten_tcc_info_across_xcds_irregular_tcc_column_names(tmp_path):
"""
Test with irregular TCC column naming patterns.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Asserts function handles various TCC column name
patterns but may fail with pandas Series ambiguity.
"""
columns = [
"Kernel_Name",
"TCC_HIT_SPECIAL[0]",
"NOT_TCC_BUT_HAS_TCC",
"TCC_MISS[0]",
]
data = [
["kernel1", 100, 50, 10],
["kernel1", 200, 60, 20],
]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_irregular.csv"
df.to_csv(csv_file, index=False)
import utils.utils as utils_mod
try:
result = utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=2, tcc_channel_per_xcd=4
)
assert len(result) == 1
assert "TCC_HIT_SPECIAL[0]" in result.columns
assert "TCC_HIT_SPECIAL[4]" in result.columns
assert "TCC_MISS[0]" in result.columns
assert "TCC_MISS[4]" in result.columns
assert result.iloc[0]["NOT_TCC_BUT_HAS_TCC"] == 50
except ValueError as e:
if "The truth value of a Series is ambiguous" in str(e):
pytest.skip(
"Function has pandas Series ambiguity issue in boolean evaluation"
)
else:
raise
def test_flatten_tcc_info_across_xcds_regex_pattern_validation(tmp_path):
"""
Test that regex pattern correctly identifies channel indices.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Asserts regex pattern works for various channel
index formats but may fail with pandas Series ambiguity.
"""
columns = ["TCC_HIT[0]", "TCC_MISS[10]", "TCC_REQ[255]", "TCC_INVALID_NO_BRACKET"]
data = [
[100, 200, 300, 400], # XCD 0
[500, 600, 700, 800], # XCD 1
]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_regex.csv"
df.to_csv(csv_file, index=False)
import utils.utils as utils_mod
try:
result = utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=2, tcc_channel_per_xcd=128
)
assert len(result) == 1
assert "TCC_HIT[0]" in result.columns
assert "TCC_HIT[128]" in result.columns # 0 + 1*128
assert "TCC_MISS[10]" in result.columns
assert "TCC_MISS[138]" in result.columns # 10 + 1*128
assert "TCC_REQ[255]" in result.columns
assert "TCC_REQ[383]" in result.columns # 255 + 1*128
assert result.iloc[0]["TCC_INVALID_NO_BRACKET"] == 400
except ValueError as e:
if "The truth value of a Series is ambiguous" in str(e):
pytest.skip(
"Function has pandas Series ambiguity issue in boolean evaluation"
)
else:
raise
def test_flatten_tcc_info_across_xcds_edge_case_validation(tmp_path):
"""
Test edge cases and validation scenarios for
flatten_tcc_info_across_xcds.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Asserts function behavior with various edge cases.
"""
import utils.utils as utils_mod
columns = ["Kernel_Name", "TCC_HIT[0]"]
data = [["kernel1", 100]]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_zero_xcds.csv"
df.to_csv(csv_file, index=False)
with pytest.raises(ValueError):
utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=0, tcc_channel_per_xcd=4
)
try:
result = utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=-1, tcc_channel_per_xcd=4
)
assert len(result) == 0
except ValueError:
pass
with pytest.raises(FileNotFoundError):
utils_mod.flatten_tcc_info_across_xcds(
"nonexistent.csv", xcds=2, tcc_channel_per_xcd=4
)
def test_flatten_tcc_info_across_xcds_pandas_filter_issue(tmp_path):
"""
Test demonstrating the pandas filter regex issue that causes Series ambiguity error.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Documents the pandas boolean evaluation issue in the function.
"""
columns = ["Kernel_Name", "TCC_HIT[0]", "SQ_WAVES"]
data = [
["kernel1", 100, 50],
["kernel1", 200, 60],
]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_pandas_issue.csv"
df.to_csv(csv_file, index=False)
import utils.utils as utils_mod
try:
result = utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=2, tcc_channel_per_xcd=4
)
assert len(result) == 1
assert "Kernel_Name" in result.columns
assert "TCC_HIT[0]" in result.columns
assert "TCC_HIT[4]" in result.columns
assert "SQ_WAVES" in result.columns
except ValueError as e:
if "The truth value of a Series is ambiguous" in str(e):
pytest.skip(
"Known issue: pandas .filter() with regex causes "
"Series boolean ambiguity"
)
else:
raise
def test_flatten_tcc_info_across_xcds_successful_cases_only(tmp_path):
"""
Test only the cases that are expected to work successfully.
Args:
tmp_path (Path): Temporary directory for test files.
Returns:
None: Asserts successful operation for known working scenarios.
"""
import utils.utils as utils_mod
columns = ["TCC_HIT[0]", "TCC_MISS[0]"]
data = [
[100, 10], # XCD 0
[200, 20], # XCD 1
]
df = pd.DataFrame(data, columns=columns)
csv_file = tmp_path / "test_simple_success.csv"
df.to_csv(csv_file, index=False)
result = utils_mod.flatten_tcc_info_across_xcds(
str(csv_file), xcds=2, tcc_channel_per_xcd=4
)
assert len(result) == 1
assert "TCC_HIT[0]" in result.columns
assert "TCC_HIT[4]" in result.columns
assert "TCC_MISS[0]" in result.columns
assert "TCC_MISS[4]" in result.columns
assert result.iloc[0]["TCC_HIT[0]"] == 100
assert result.iloc[0]["TCC_HIT[4]"] == 200
# =============================================================================
# TESTS FOR flatten_tcc_info_across_xcds
# =============================================================================
"""
Normal Functionality:
@@ -8517,173 +8176,6 @@ def test_add_counter_overwrite_existing():
updated_properties = ["P_UPDATED", "P_NEW"] # noqa
# =================================================================================
# Test extract counter info extra config input yaml
# =================================================================================
def test_extract_counter_info_returns_none_when_not_found():
"""
Test that extract_counter_info_extra_config_input_yaml returns None
when the counter is not found or data structure is incomplete.
"""
data_empty = {}
assert (
utils.extract_counter_info_extra_config_input_yaml(data_empty, "ANY_COUNTER")
is None
)
data_no_counters_key = {"rocprofiler-sdk": {}}
assert (
utils.extract_counter_info_extra_config_input_yaml(
data_no_counters_key, "ANY_COUNTER"
)
is None
)
data_empty_counters_list = {"rocprofiler-sdk": {"counters": []}}
assert (
utils.extract_counter_info_extra_config_input_yaml(
data_empty_counters_list, "ANY_COUNTER"
)
is None
)
data_with_other_counters = {
"rocprofiler-sdk": {
"counters": [
{"name": "EXISTING_COUNTER_1", "value": "val1"},
{"name": "EXISTING_COUNTER_2", "value": "val2"},
]
}
}
assert (
utils.extract_counter_info_extra_config_input_yaml(
data_with_other_counters, "NON_EXISTENT_COUNTER"
)
is None
)
data_with_malformed_counter = {
"rocprofiler-sdk": {
"counters": [
{"value": "val1"}, # No 'name' key
{"name": "EXISTING_COUNTER_2", "value": "val2"},
]
}
}
assert (
utils.extract_counter_info_extra_config_input_yaml(
data_with_malformed_counter, "EXISTING_COUNTER_1"
)
is None
)
assert (
utils.extract_counter_info_extra_config_input_yaml(
data_with_malformed_counter, "EXISTING_COUNTER_2"
)
is not None
)
def test_extract_counter_info_returns_counter_when_found():
"""
Test that extract_counter_info_extra_config_input_yaml returns the correct
counter dictionary when the counter is found.
"""
counter1_details = {
"name": "MY_COUNTER_1",
"description": "Desc 1",
"expression": "expr1",
}
counter2_details = {
"name": "MY_COUNTER_2",
"description": "Desc 2",
"expression": "expr2",
}
data = {
"rocprofiler-sdk": {
"counters-schema-version": 1,
"counters": [
counter1_details,
counter2_details,
],
}
}
extracted_counter1 = utils.extract_counter_info_extra_config_input_yaml(
data, "MY_COUNTER_1"
)
assert extracted_counter1 is not None
assert extracted_counter1 == counter1_details
extracted_counter2 = utils.extract_counter_info_extra_config_input_yaml(
data, "MY_COUNTER_2"
)
assert extracted_counter2 is not None
assert extracted_counter2 == counter2_details
# =============================================================================
# test using_v1 function
# =============================================================================
def test_using_v1_rocprof_set_and_ends_with_rocprof_returns_true():
"""
Covers the case where "ROCPROF" is in os.environ and its value ends with "rocprof".
This makes the entire expression True, so the function returns True.
"""
with mock.patch.dict(
os.environ, {"ROCPROF": "/opt/rocm/bin/rocprof", "OTHER_VAR": "value"}
):
assert utils.using_v1() is True
def test_using_v1_rocprof_set_but_not_ends_with_rocprof_returns_false():
"""
Covers the case where "ROCPROF" is in os.environ, but its value does
NOT end with "rocprof".
The second part of the 'and' (os.environ["ROCPROF"].endswith("rocprof")) is False.
So the function returns False.
"""
with mock.patch.dict(
os.environ, {"ROCPROF": "/opt/rocm/bin/rocprofv2", "OTHER_VAR": "value"}
):
assert utils.using_v1() is False
with mock.patch.dict(
os.environ, {"ROCPROF": "some/path/to/rocprof_tool", "OTHER_VAR": "value"}
):
assert utils.using_v1() is False
def test_using_v1_rocprof_not_in_environ_returns_false():
"""
Covers the case where "ROCPROF" is NOT in os.environ.
The first part of the 'and' ("ROCPROF" in os.environ.keys()) is False.
Due to short-circuiting, the second part is not evaluated.
So the function returns False.
"""
current_env = os.environ.copy()
if "ROCPROF" in current_env:
del current_env["ROCPROF"]
with mock.patch.dict(os.environ, current_env, clear=True):
assert utils.using_v1() is False
def test_using_v1_rocprof_is_empty_string_returns_false():
"""
Covers the case where "ROCPROF" is in os.environ but is an empty string.
The second part (os.environ["ROCPROF"].endswith("rocprof")) will be False.
So the function returns False.
"""
with mock.patch.dict(os.environ, {"ROCPROF": "", "OTHER_VAR": "value"}):
assert utils.using_v1() is False
# =============================================================================
# additional test detect_rocprof console error
# =============================================================================
@@ -8732,27 +8224,6 @@ class MockArgs: # noqa
return self.__dict__ == other.__dict__
def test_store_app_cmd_sets_global_rocprof_args():
"""
Tests that store_app_cmd correctly assigns the passed 'args'
object to the global 'rocprof_args'.
"""
sample_args_object = MockArgs(
rocprofiler_sdk_library_path="/path/to/sdk",
input_file="input.txt",
some_other_option=True,
)
if hasattr(utils, "rocprof_args"):
utils.rocprof_args = None
else:
pass
utils.store_app_cmd(sample_args_object)
assert utils.rocprof_args is sample_args_object, (
"Global rocprof_args should be the same object as the passed args"
)
# =============================================================================
# additional tests for v3_counter_csv_to_v2_csv function
# =============================================================================
@@ -9112,155 +8583,6 @@ def test_pc_sampling_prof_empty_appcmd(
mock_console_error.assert_not_called()
# =============================================================================
# test replace_timestamps function
# =============================================================================
def create_dummy_csv(filepath, data_dict):
df = pd.DataFrame(data_dict)
df.to_csv(filepath, index=False)
@mock.patch("utils.utils.console_warning")
def test_replace_timestamps_no_timestamps_csv_returns_early(
mock_console_warning, tmp_path
):
"""
Edge Case: timestamps.csv does not exist in workload_dir.
The function should return early.
Covers: if not path(workload_dir, "timestamps.csv").is_file(): return
"""
workload_dir = str(tmp_path)
utils.replace_timestamps(workload_dir)
# Since there's no timestamps.csv, function should return early
# and console_warning should not be called
mock_console_warning.assert_not_called()
@mock.patch("utils.utils.console_warning")
@mock.patch("glob.glob")
def test_replace_timestamps_timestamps_csv_missing_columns_warns(
mock_glob, mock_console_warning, tmp_path
):
"""
Edge Case: timestamps.csv exists but is missing
'Start_Timestamp' or 'End_Timestamp'.
The function should call console_warning.
Covers: else: console_warning(...)
"""
workload_dir = str(tmp_path)
timestamps_csv_path_str = os.path.join(workload_dir, "timestamps.csv")
# Create the actual CSV file with missing columns
create_dummy_csv(timestamps_csv_path_str, {"Some_Other_Column": [123]})
utils.replace_timestamps(workload_dir)
# Verify console_warning was called
mock_console_warning.assert_called_once_with(
"Incomplete profiling data detected. Unable to update timestamps.\n"
)
# Verify glob wasn't called (since we return early due to missing columns)
mock_glob.assert_not_called()
@mock.patch("utils.utils.console_warning")
@mock.patch("glob.glob")
def test_replace_timestamps_updates_other_csvs_skips_sysinfo(
mock_glob, mock_console_warning, tmp_path
):
"""
Edge Case: timestamps.csv is valid. Other CSVs exist, including sysinfo.csv.
Only non-sysinfo.csv files should be updated.
Covers: for fname in glob.glob(...): if path(fname).name != "sysinfo.csv": ...
"""
workload_dir = str(tmp_path)
timestamps_csv_path_str = os.path.join(workload_dir, "timestamps.csv")
data_csv_path_str = os.path.join(workload_dir, "data.csv")
sysinfo_csv_path_str = os.path.join(workload_dir, "sysinfo.csv")
new_start_ts = [1000, 2000]
new_end_ts = [1500, 2500]
create_dummy_csv(
timestamps_csv_path_str,
{"Start_Timestamp": new_start_ts, "End_Timestamp": new_end_ts},
)
create_dummy_csv(
data_csv_path_str,
{"Kernel_Name": ["A", "B"], "Start_Timestamp": [1, 2], "End_Timestamp": [3, 4]},
)
create_dummy_csv(
sysinfo_csv_path_str,
{"Info": ["CPU", "MEM"], "Start_Timestamp": [5, 6], "End_Timestamp": [7, 8]},
)
# Mock glob to return the CSV files we created
mock_glob.return_value = [
data_csv_path_str,
sysinfo_csv_path_str,
timestamps_csv_path_str,
]
utils.replace_timestamps(workload_dir)
mock_console_warning.assert_not_called()
# Verify data.csv was updated with new timestamps
df_data_updated = pd.read_csv(data_csv_path_str)
pd.testing.assert_series_equal(
df_data_updated["Start_Timestamp"],
pd.Series(new_start_ts, name="Start_Timestamp"),
)
pd.testing.assert_series_equal(
df_data_updated["End_Timestamp"], pd.Series(new_end_ts, name="End_Timestamp")
)
# Verify sysinfo.csv was NOT updated (timestamps should remain original)
df_sysinfo_original = pd.read_csv(sysinfo_csv_path_str)
assert list(df_sysinfo_original["Start_Timestamp"]) == [5, 6]
assert list(df_sysinfo_original["End_Timestamp"]) == [7, 8]
@mock.patch("utils.utils.console_warning")
@mock.patch("glob.glob")
def test_replace_timestamps_no_other_csvs_to_update(
mock_glob, mock_console_warning, tmp_path
):
"""
Edge Case: timestamps.csv is valid, but no other *.csv files
(or only sysinfo.csv) exist.
The loop for updating files should not do anything or not run.
Covers: The for loop not iterating if glob returns empty or only sysinfo.
"""
workload_dir = str(tmp_path)
timestamps_csv_path_str = os.path.join(workload_dir, "timestamps.csv")
sysinfo_csv_path_str = os.path.join(workload_dir, "sysinfo.csv")
create_dummy_csv(
timestamps_csv_path_str, {"Start_Timestamp": [100], "End_Timestamp": [200]}
)
create_dummy_csv(
sysinfo_csv_path_str,
{"Info": ["CPU"], "Start_Timestamp": [5], "End_Timestamp": [7]},
)
# Mock glob to return only timestamps.csv and sysinfo.csv
mock_glob.return_value = [timestamps_csv_path_str, sysinfo_csv_path_str]
utils.replace_timestamps(workload_dir)
mock_console_warning.assert_not_called()
# Verify sysinfo.csv was NOT updated (timestamps should remain original)
df_sysinfo_original = pd.read_csv(sysinfo_csv_path_str)
assert list(df_sysinfo_original["Start_Timestamp"]) == [5]
assert list(df_sysinfo_original["End_Timestamp"]) == [7]
def test_set_parser():
from utils.utils import parse_sets_yaml