Improve --time-unit arg (#807)

[ROCm/rocprofiler-compute commit: 99a6e67bcc]
2025-07-24 12:15:52 -04:00
commit dcdadfd37d
@@ -89,6 +89,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
 ### Optimized

 * ROCm Compute Profiler CLI has been improved to better display the GPU architecture analytics
+* Improved `--time-unit` option in analyze mode to apply time unit conversion across all analysis sections, not just kernel top stats.

 ### Resolved issues

@@ -595,7 +595,7 @@ Examples:
        metavar="",
        default="ns",
        choices=["s", "ms", "us", "ns"],
-        help="\t\tSpecify display time unit in kernel top stats: (DEFAULT: ns)\n\t\t   s\n\t\t   ms\n\t\t   us\n\t\t   ns",
+        help="\t\tSpecify display time unit: (DEFAULT: ns)\n\t\t   s\n\t\t   ms\n\t\t   us\n\t\t   ns",
    )
    analyze_advanced_group.add_argument(
        "--decimal",
@@ -30,3 +30,5 @@ PROJECT_NAME = "rocprofiler-compute"

 HIDDEN_COLUMNS = ["Tips", "coll_level"]
 HIDDEN_SECTIONS = [400, 1900, 2000]
+
+TIME_UNITS = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
@@ -91,7 +91,7 @@ Panel Config:
          unit: Unit
          tips: Tips
        metric:
-          Kernel Time (Nanosec):
+          Kernel Time:
            avg: AVG((End_Timestamp - Start_Timestamp))
            min: MIN((End_Timestamp - Start_Timestamp))
            max: MAX((End_Timestamp - Start_Timestamp))
@@ -91,7 +91,7 @@ Panel Config:
          unit: Unit
          tips: Tips
        metric:
-          Kernel Time (Nanosec):
+          Kernel Time:
            avg: AVG((End_Timestamp - Start_Timestamp))
            min: MIN((End_Timestamp - Start_Timestamp))
            max: MAX((End_Timestamp - Start_Timestamp))
@@ -91,7 +91,7 @@ Panel Config:
          unit: Unit
          tips: Tips
        metric:
-          Kernel Time (Nanosec):
+          Kernel Time:
            avg: AVG((End_Timestamp - Start_Timestamp))
            min: MIN((End_Timestamp - Start_Timestamp))
            max: MAX((End_Timestamp - Start_Timestamp))
@@ -91,7 +91,7 @@ Panel Config:
          unit: Unit
          tips: Tips
        metric:
-          Kernel Time (Nanosec):
+          Kernel Time:
            avg: AVG((End_Timestamp - Start_Timestamp))
            min: MIN((End_Timestamp - Start_Timestamp))
            max: MAX((End_Timestamp - Start_Timestamp))
@@ -91,7 +91,7 @@ Panel Config:
          unit: Unit
          tips: Tips
        metric:
-          Kernel Time (Nanosec):
+          Kernel Time:
            avg: AVG((End_Timestamp - Start_Timestamp))
            min: MIN((End_Timestamp - Start_Timestamp))
            max: MAX((End_Timestamp - Start_Timestamp))
@@ -91,7 +91,7 @@ Panel Config:
          unit: Unit
          tips: Tips
        metric:
-          Kernel Time (Nanosec):
+          Kernel Time:
            avg: AVG((End_Timestamp - Start_Timestamp))
            min: MIN((End_Timestamp - Start_Timestamp))
            max: MAX((End_Timestamp - Start_Timestamp))
@@ -22,11 +22,8 @@
 # SOFTWARE.
 ##############################################################################el

-import collections
-import glob
 import os
 import re
-import sys
 from collections import OrderedDict
 from pathlib import Path

@@ -55,8 +52,6 @@ top_stats_build_in_config = {
    },
 }

-time_units = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
-

 def load_sys_info(f):
    """
@@ -173,11 +168,11 @@ def create_df_kernel_top_stats(
    ]

    key = "Sum" + time_unit_str
-    grouped[key] = grouped[key].div(time_units[time_unit])
+    grouped[key] = grouped[key].div(config.TIME_UNITS[time_unit])
    key = "Mean" + time_unit_str
-    grouped[key] = grouped[key].div(time_units[time_unit])
+    grouped[key] = grouped[key].div(config.TIME_UNITS[time_unit])
    key = "Median" + time_unit_str
-    grouped[key] = grouped[key].div(time_units[time_unit])
+    grouped[key] = grouped[key].div(config.TIME_UNITS[time_unit])

    grouped = grouped.reset_index()  # Remove special group indexing

@@ -28,7 +28,7 @@ from pathlib import Path
 import pandas as pd
 from tabulate import tabulate

-from config import HIDDEN_COLUMNS, HIDDEN_SECTIONS
+import config
 from utils import mem_chart, parser
 from utils.logger import console_error, console_log, console_warning
 from utils.utils import convert_metric_id_to_panel_info
@@ -59,6 +59,50 @@ def get_table_string(df, transpose=False, decimal=2):
    )


+def convert_time_columns(df, time_unit):
+    """
+    Convert time column values based on the specified time unit.
+    Uses the Unit column to identify which columns contain time data.
+    """
+    if time_unit not in config.TIME_UNITS or "Unit" not in df.columns:
+        return df
+
+    # Avoid modifying the original
+    df_copy = df.copy()
+
+    time_rows = df_copy["Unit"].str.lower().str.contains("ns", na=False)
+
+    time_value_columns = ["Avg", "Min", "Max"]
+
+    for col in time_value_columns:
+        if col in df_copy.columns:
+            mask = time_rows
+            if mask.any():
+                try:
+                    numeric_values = pd.to_numeric(
+                        df_copy.loc[mask, col], errors="coerce"
+                    )
+                    df_copy.loc[mask, col] = numeric_values / config.TIME_UNITS[time_unit]
+                except:
+                    pass
+
+    # Update the Unit column
+    if time_rows.any():
+        df_copy.loc[time_rows, "Unit"] = time_unit
+
+    return df_copy
+
+
+def has_time_data(df):
+    """
+    Check if the dataframe contains time data by looking at the Unit column.
+    """
+    if "Unit" not in df.columns:
+        return False
+    # NOTE: "ns" / "NS" / "nS" / "Ns" are reserved for Nanosec time unit
+    return df["Unit"].str.lower().str.contains("ns", na=False).any()
+
+
 def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
    """
    Show all panels with their data in plain text mode.
@@ -77,7 +121,7 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):

    for panel_id, panel in archConfigs.panel_configs.items():
        # Skip panels that don't support baseline comparison
-        if len(args.path) > 1 and panel_id in HIDDEN_SECTIONS:
+        if len(args.path) > 1 and panel_id in config.HIDDEN_SECTIONS:
            continue
        ss = ""  # store content of all data_source from one panel

@@ -138,7 +182,7 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
                            .loc[lambda d: d["Metric"].isin(common_metrics)]
                        )
                        if initial_index is None:
-                            initial_index= runs[key].dfs[table_config["id"]].index
+                            initial_index = runs[key].dfs[table_config["id"]].index
                        else:
                            runs[key].dfs[table_config["id"]].index = initial_index

@@ -146,6 +190,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
                base_run, base_data = next(iter(runs.items()))
                base_df = base_data.dfs[table_config["id"]]

+                if args.time_unit and has_time_data(base_df):
+                    base_df = convert_time_columns(base_df, args.time_unit)
+
                df = pd.DataFrame(index=base_df.index)

                for header in list(base_df.keys()):
@@ -154,7 +201,7 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
                        or (args.cols and base_df.columns.get_loc(header) in args.cols)
                        or (type == "raw_csv_table")
                    ):
-                        if header in HIDDEN_COLUMNS:
+                        if header in config.HIDDEN_COLUMNS:
                            pass
                        elif header not in comparable_columns:
                            if (
@@ -184,9 +231,13 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
                        else:
                            for run, data in runs.items():
                                cur_df = data.dfs[table_config["id"]]
+
+                                if args.time_unit and has_time_data(base_df):
+                                    cur_df = convert_time_columns(cur_df, args.time_unit)
+
                                if (type == "raw_csv_table") or (
                                    type == "metric_table"
-                                    and (not header in HIDDEN_COLUMNS)
+                                    and (not header in config.HIDDEN_COLUMNS)
                                ):
                                    if run != base_run:
                                        # calc percentage over the baseline
@@ -24,8 +24,7 @@

 import os
 import shutil
-from importlib.machinery import SourceFileLoader
-from unittest.mock import patch
+from unittest.mock import Mock, patch

 import pandas as pd
 import pytest
@@ -42,6 +41,8 @@ indirs = [
    "tests/workloads/vcopy/MI350",
 ]

+time_units = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
+

@pytest.mark.misc
 def test_valid_path(binary_handler_analyze_rocprof_compute):
@@ -1155,3 +1156,256 @@ def test_update_functions_coverage():
    result = update_normUnit_string("(Prefix + $normUnit)", "per_wave")
    assert "per wave" in result.lower()
    assert result[0].isupper()
+
+
+@pytest.fixture
+def sample_time_data():
+    return pd.DataFrame(
+        {
+            "Metric_ID": ["7.2.0", "7.2.1", "7.2.2"],
+            "Metric": [
+                "Kernel Time",
+                "Kernel Time (Cycles)",
+                "Non-Time Metric",
+            ],
+            "Avg": [3446.64, 64499.39, 1000.0],
+            "Min": [1769.25, 17269.25, 500.0],
+            "Max": [12532.12, 337030.50, 2000.0],
+            "Unit": ["ns", "Cycle", "Count"],
+        }
+    )
+
+
+@pytest.fixture
+def original_ns_values():
+    return {"Avg": 3446.64, "Min": 1769.25, "Max": 12532.12}
+
+
+@pytest.mark.time_unit_conversion
+def test_has_time_data_detection(sample_time_data):
+    from utils.tty import has_time_data
+
+    assert has_time_data(sample_time_data)
+
+    no_time_data = pd.DataFrame(
+        {"Metric": ["Non-Time Metric"], "Avg": [1000.0], "Unit": ["Count"]}
+    )
+    assert not has_time_data(no_time_data)
+
+    no_unit_column = pd.DataFrame({"Metric": ["Some Metric"], "Avg": [1000.0]})
+    assert not has_time_data(no_unit_column)
+
+
+@pytest.mark.time_unit_conversion
+def test_default_unit_is_nanoseconds(sample_time_data):
+    time_rows = sample_time_data["Unit"].str.lower().str.contains("ns", na=False)
+    assert time_rows.any()
+    assert sample_time_data.loc[0, "Unit"] == "ns"
+
+
+@pytest.mark.time_unit_conversion
+def test_time_unit_conversion_to_seconds(sample_time_data, original_ns_values):
+    from utils.tty import convert_time_columns
+
+    converted_df = convert_time_columns(sample_time_data, "s")
+
+    assert converted_df.loc[0, "Unit"] == "s"
+
+    expected_avg = original_ns_values["Avg"] / time_units["s"]
+    expected_min = original_ns_values["Min"] / time_units["s"]
+    expected_max = original_ns_values["Max"] / time_units["s"]
+
+    assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-10
+    assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-10
+    assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-10
+
+    assert converted_df.loc[1, "Unit"] == "Cycle"
+    assert converted_df.loc[2, "Unit"] == "Count"
+
+
+@pytest.mark.time_unit_conversion
+def test_time_unit_conversion_to_milliseconds(sample_time_data, original_ns_values):
+    from utils.tty import convert_time_columns
+
+    converted_df = convert_time_columns(sample_time_data, "ms")
+
+    assert converted_df.loc[0, "Unit"] == "ms"
+
+    expected_avg = original_ns_values["Avg"] / time_units["ms"]
+    expected_min = original_ns_values["Min"] / time_units["ms"]
+    expected_max = original_ns_values["Max"] / time_units["ms"]
+
+    assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-6
+    assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-6
+    assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-6
+
+
+@pytest.mark.time_unit_conversion
+def test_time_unit_conversion_to_microseconds(sample_time_data, original_ns_values):
+    from utils.tty import convert_time_columns
+
+    converted_df = convert_time_columns(sample_time_data, "us")
+
+    assert converted_df.loc[0, "Unit"] == "us"
+
+    expected_avg = original_ns_values["Avg"] / time_units["us"]
+    expected_min = original_ns_values["Min"] / time_units["us"]
+    expected_max = original_ns_values["Max"] / time_units["us"]
+
+    assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-3
+    assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-3
+    assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-3
+
+
+@pytest.mark.time_unit_conversion
+def test_time_unit_conversion_to_nanoseconds(sample_time_data, original_ns_values):
+    from utils.tty import convert_time_columns
+
+    converted_df = convert_time_columns(sample_time_data, "ns")
+
+    assert converted_df.loc[0, "Unit"] == "ns"
+
+    assert abs(converted_df.loc[0, "Avg"] - original_ns_values["Avg"]) < 1e-10
+    assert abs(converted_df.loc[0, "Min"] - original_ns_values["Min"]) < 1e-10
+    assert abs(converted_df.loc[0, "Max"] - original_ns_values["Max"]) < 1e-10
+
+
+@pytest.mark.time_unit_conversion
+def test_non_time_rows_unchanged(sample_time_data):
+    from utils.tty import convert_time_columns
+
+    converted_df = convert_time_columns(sample_time_data, "ms")
+
+    assert converted_df.loc[1, "Unit"] == "Cycle"
+    assert converted_df.loc[2, "Unit"] == "Count"
+    assert converted_df.loc[1, "Avg"] == 64499.39
+    assert converted_df.loc[2, "Avg"] == 1000.0
+
+
+@pytest.mark.time_unit_conversion
+def test_invalid_time_unit_handling(sample_time_data):
+    from utils.tty import convert_time_columns
+
+    original_df = sample_time_data.copy()
+    converted_df = convert_time_columns(sample_time_data, "invalid_unit")
+
+    pd.testing.assert_frame_equal(converted_df, original_df)
+
+
+@pytest.mark.time_unit_conversion
+def test_missing_unit_column():
+    from utils.tty import convert_time_columns
+
+    df_no_unit = pd.DataFrame({"Metric": ["Test Metric"], "Avg": [1000.0]})
+    converted_df = convert_time_columns(df_no_unit, "ms")
+
+    pd.testing.assert_frame_equal(converted_df, df_no_unit)
+
+
+@pytest.mark.time_unit_conversion
+def test_conversion_with_missing_columns(sample_time_data, original_ns_values):
+    from utils.tty import convert_time_columns
+
+    df_partial = sample_time_data[["Metric_ID", "Metric", "Avg", "Unit"]].copy()
+    converted_df = convert_time_columns(df_partial, "ms")
+
+    assert converted_df.loc[0, "Unit"] == "ms"
+    expected_avg = original_ns_values["Avg"] / time_units["ms"]
+    assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-6
+
+
+@pytest.mark.time_unit_conversion
+def test_mathematical_correctness_all_units(sample_time_data, original_ns_values):
+    from utils.tty import convert_time_columns
+
+    test_cases = [
+        ("s", 10**9),  # 1 second = 10^9 nanoseconds
+        ("ms", 10**6),  # 1 millisecond = 10^6 nanoseconds
+        ("us", 10**3),  # 1 microsecond = 10^3 nanoseconds
+        ("ns", 1),  # 1 nanosecond = 1 nanosecond
+    ]
+
+    for target_unit, divisor in test_cases:
+        converted_df = convert_time_columns(sample_time_data, target_unit)
+
+        expected_avg = original_ns_values["Avg"] / divisor
+        expected_min = original_ns_values["Min"] / divisor
+        expected_max = original_ns_values["Max"] / divisor
+
+        assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-10
+        assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-10
+        assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-10
+        assert converted_df.loc[0, "Unit"] == target_unit
+
+
+# Integration tests with show_all functionality
+@pytest.mark.time_unit_integration
+def test_integration_conversion_flow():
+    from utils.tty import convert_time_columns, has_time_data
+
+    mock_args = Mock()
+    mock_args.time_unit = "ms"
+    mock_args.decimal = 2
+
+    sample_df = pd.DataFrame(
+        {
+            "Metric_ID": ["7.2.0"],
+            "Metric": ["Kernel Time"],
+            "Avg": [3446640.0],  # 3.44664 ms in nanoseconds
+            "Min": [1769250.0],  # 1.76925 ms in nanoseconds
+            "Max": [12532120.0],  # 12.53212 ms in nanoseconds
+            "Unit": ["ns"],
+        }
+    )
+
+    if has_time_data(sample_df):
+        converted_df = convert_time_columns(sample_df, mock_args.time_unit)
+    else:
+        converted_df = sample_df
+
+    assert converted_df.loc[0, "Unit"] == "ms"
+    assert abs(converted_df.loc[0, "Avg"] - 3.44664) < 1e-5
+    assert abs(converted_df.loc[0, "Min"] - 1.76925) < 1e-5
+    assert abs(converted_df.loc[0, "Max"] - 12.53212) < 1e-5
+
+
+@pytest.mark.time_unit_integration
+def test_show_all_with_time_unit_conversion():
+    from utils.tty import convert_time_columns
+
+    test_data = pd.DataFrame(
+        {
+            "Metric_ID": ["7.2.0"],
+            "Metric": ["Kernel Time"],
+            "Avg": [3446.64],
+            "Min": [1769.25],
+            "Max": [12532.12],
+            "Unit": ["Ns"],
+        }
+    )
+
+    for time_unit in ["s", "ms", "us", "ns"]:
+        converted_df = convert_time_columns(test_data, time_unit)
+
+        assert converted_df.loc[0, "Unit"] == time_unit
+
+        expected_avg = 3446.64 / time_units[time_unit]
+        assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-10
+
+
+@pytest.mark.time_unit_edge_cases
+def test_edge_cases_and_error_handling():
+    from utils.tty import convert_time_columns
+
+    empty_df = pd.DataFrame()
+    result = convert_time_columns(empty_df, "ms")
+    assert result.empty
+
+    nan_df = pd.DataFrame({"Avg": [float("nan"), 1000.0], "Unit": ["ns", "Count"]})
+    result = convert_time_columns(nan_df, "ms")
+    assert result.loc[0, "Unit"] == "ms"
+
+    mixed_case_df = pd.DataFrame({"Avg": [1000.0, 2000.0], "Unit": ["ns", "NS"]})
+    result = convert_time_columns(mixed_case_df, "ms")
+    assert result.loc[0, "Unit"] == "ms"
+    assert result.loc[1, "Unit"] == "ms"