From e8ec84a854adc01e767d71d2f66e8ea13b9cff7f Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Thu, 11 Jan 2024 12:55:31 -0600 Subject: [PATCH] Separate aggregation of TCC and TCC2 counters Co-authored-by: fei.zheng Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 4ffcaa1b4a4c9a38543da4ad0da46ddac9d9dc19] --- .../src/omniperf_soc/soc_base.py | 48 ++++++++++++++----- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py b/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py index 5e3d87cd14..7a90b4130d 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py @@ -24,7 +24,6 @@ from abc import ABC, abstractmethod import logging -import sys import os import math import shutil @@ -58,6 +57,8 @@ class OmniSoC_Base(): self.__perfmon_config = config def set_soc_param(self, param: dict): self.__soc_params = param + def get_perfmon_dir(self): + return self.__perfmon_dir def get_soc_param(self): return self.__soc_params def set_soc(self, soc: str): @@ -67,6 +68,13 @@ class OmniSoC_Base(): def get_args(self): return self.__args + @demarcate + def get_profiler_options(self): + """Fetch any SoC specific arguments required by the profiler + """ + # assume no SoC specific options and return empty list by default + return [] + @demarcate def perfmon_filter(self, roofline_perfmon_only: bool): """Filter default performance counter set based on user arguments @@ -308,8 +316,8 @@ def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): / perfmon_config["TCC"] ) - # Total number iterations to write pmc: counters line - niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt))) + # Total number iterations to write pmc: counters line, except TCC2 + niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt)) # Emit PMC counters into pmc config file if workload_dir: @@ -318,7 +326,6 @@ def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): else: batches = [] - tcc2_index = 0 for iter in range(niter): # Prefix line = "pmc: " @@ -335,15 +342,6 @@ def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): N = perfmon_config["TCC"] tcc_counters = pmc_list["TCC"][iter * N : iter * N + N] - if not tcc_counters: - # TCC per-channel counters - for ch in range(perfmon_config["TCC_channels"]): - tcc_counters += pmc_list["TCC2"][str(ch)][ - tcc2_index * N : tcc2_index * N + N - ] - - tcc2_index += 1 - # TCC aggregated counters line = line + " " + " ".join(tcc_counters) if workload_dir: @@ -353,6 +351,30 @@ def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): b.remove("pmc:") batches.append(b) + # TCC2, handle TCC per channel counters separatly + tcc2_index = 0 + niter = math.ceil(max(tcc2_cnt)) + for iter in range(niter): + # Prefix + line = "pmc: " + + N = perfmon_config["TCC"] + # TCC per-channel counters + tcc_counters = [] + for ch in range(perfmon_config["TCC_channels"]): + tcc_counters = pmc_list["TCC2"][str(ch)][tcc2_index * N : tcc2_index * N + N] + + tcc2_index += 1 + + # TCC2 aggregated counters + line = line + " " + " ".join(tcc_counters) + if workload_dir: + fd.write(line + "\n") + else: + b = line.split() + b.remove("pmc:") + batches.append(b) + if workload_dir: fd.write("\ngpu:\n") fd.write("range:\n")