Improve --time-unit arg (#807)
[ROCm/rocprofiler-compute commit: 99a6e67bcc]
This commit is contained in:
@@ -89,6 +89,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
### Optimized
|
||||
|
||||
* ROCm Compute Profiler CLI has been improved to better display the GPU architecture analytics
|
||||
* Improved `--time-unit` option in analyze mode to apply time unit conversion across all analysis sections, not just kernel top stats.
|
||||
|
||||
### Resolved issues
|
||||
|
||||
|
||||
@@ -595,7 +595,7 @@ Examples:
|
||||
metavar="",
|
||||
default="ns",
|
||||
choices=["s", "ms", "us", "ns"],
|
||||
help="\t\tSpecify display time unit in kernel top stats: (DEFAULT: ns)\n\t\t s\n\t\t ms\n\t\t us\n\t\t ns",
|
||||
help="\t\tSpecify display time unit: (DEFAULT: ns)\n\t\t s\n\t\t ms\n\t\t us\n\t\t ns",
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"--decimal",
|
||||
|
||||
@@ -30,3 +30,5 @@ PROJECT_NAME = "rocprofiler-compute"
|
||||
|
||||
HIDDEN_COLUMNS = ["Tips", "coll_level"]
|
||||
HIDDEN_SECTIONS = [400, 1900, 2000]
|
||||
|
||||
TIME_UNITS = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
|
||||
|
||||
+1
-1
@@ -91,7 +91,7 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
Kernel Time:
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
|
||||
+1
-1
@@ -91,7 +91,7 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
Kernel Time:
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
|
||||
+1
-1
@@ -91,7 +91,7 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
Kernel Time:
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
|
||||
+1
-1
@@ -91,7 +91,7 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
Kernel Time:
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
|
||||
+1
-1
@@ -91,7 +91,7 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
Kernel Time:
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
|
||||
+1
-1
@@ -91,7 +91,7 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
Kernel Time:
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
|
||||
@@ -22,11 +22,8 @@
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import collections
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
@@ -55,8 +52,6 @@ top_stats_build_in_config = {
|
||||
},
|
||||
}
|
||||
|
||||
time_units = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
|
||||
|
||||
|
||||
def load_sys_info(f):
|
||||
"""
|
||||
@@ -173,11 +168,11 @@ def create_df_kernel_top_stats(
|
||||
]
|
||||
|
||||
key = "Sum" + time_unit_str
|
||||
grouped[key] = grouped[key].div(time_units[time_unit])
|
||||
grouped[key] = grouped[key].div(config.TIME_UNITS[time_unit])
|
||||
key = "Mean" + time_unit_str
|
||||
grouped[key] = grouped[key].div(time_units[time_unit])
|
||||
grouped[key] = grouped[key].div(config.TIME_UNITS[time_unit])
|
||||
key = "Median" + time_unit_str
|
||||
grouped[key] = grouped[key].div(time_units[time_unit])
|
||||
grouped[key] = grouped[key].div(config.TIME_UNITS[time_unit])
|
||||
|
||||
grouped = grouped.reset_index() # Remove special group indexing
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ from pathlib import Path
|
||||
import pandas as pd
|
||||
from tabulate import tabulate
|
||||
|
||||
from config import HIDDEN_COLUMNS, HIDDEN_SECTIONS
|
||||
import config
|
||||
from utils import mem_chart, parser
|
||||
from utils.logger import console_error, console_log, console_warning
|
||||
from utils.utils import convert_metric_id_to_panel_info
|
||||
@@ -59,6 +59,50 @@ def get_table_string(df, transpose=False, decimal=2):
|
||||
)
|
||||
|
||||
|
||||
def convert_time_columns(df, time_unit):
|
||||
"""
|
||||
Convert time column values based on the specified time unit.
|
||||
Uses the Unit column to identify which columns contain time data.
|
||||
"""
|
||||
if time_unit not in config.TIME_UNITS or "Unit" not in df.columns:
|
||||
return df
|
||||
|
||||
# Avoid modifying the original
|
||||
df_copy = df.copy()
|
||||
|
||||
time_rows = df_copy["Unit"].str.lower().str.contains("ns", na=False)
|
||||
|
||||
time_value_columns = ["Avg", "Min", "Max"]
|
||||
|
||||
for col in time_value_columns:
|
||||
if col in df_copy.columns:
|
||||
mask = time_rows
|
||||
if mask.any():
|
||||
try:
|
||||
numeric_values = pd.to_numeric(
|
||||
df_copy.loc[mask, col], errors="coerce"
|
||||
)
|
||||
df_copy.loc[mask, col] = numeric_values / config.TIME_UNITS[time_unit]
|
||||
except:
|
||||
pass
|
||||
|
||||
# Update the Unit column
|
||||
if time_rows.any():
|
||||
df_copy.loc[time_rows, "Unit"] = time_unit
|
||||
|
||||
return df_copy
|
||||
|
||||
|
||||
def has_time_data(df):
|
||||
"""
|
||||
Check if the dataframe contains time data by looking at the Unit column.
|
||||
"""
|
||||
if "Unit" not in df.columns:
|
||||
return False
|
||||
# NOTE: "ns" / "NS" / "nS" / "Ns" are reserved for Nanosec time unit
|
||||
return df["Unit"].str.lower().str.contains("ns", na=False).any()
|
||||
|
||||
|
||||
def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
"""
|
||||
Show all panels with their data in plain text mode.
|
||||
@@ -77,7 +121,7 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
|
||||
for panel_id, panel in archConfigs.panel_configs.items():
|
||||
# Skip panels that don't support baseline comparison
|
||||
if len(args.path) > 1 and panel_id in HIDDEN_SECTIONS:
|
||||
if len(args.path) > 1 and panel_id in config.HIDDEN_SECTIONS:
|
||||
continue
|
||||
ss = "" # store content of all data_source from one panel
|
||||
|
||||
@@ -138,7 +182,7 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
.loc[lambda d: d["Metric"].isin(common_metrics)]
|
||||
)
|
||||
if initial_index is None:
|
||||
initial_index= runs[key].dfs[table_config["id"]].index
|
||||
initial_index = runs[key].dfs[table_config["id"]].index
|
||||
else:
|
||||
runs[key].dfs[table_config["id"]].index = initial_index
|
||||
|
||||
@@ -146,6 +190,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
base_run, base_data = next(iter(runs.items()))
|
||||
base_df = base_data.dfs[table_config["id"]]
|
||||
|
||||
if args.time_unit and has_time_data(base_df):
|
||||
base_df = convert_time_columns(base_df, args.time_unit)
|
||||
|
||||
df = pd.DataFrame(index=base_df.index)
|
||||
|
||||
for header in list(base_df.keys()):
|
||||
@@ -154,7 +201,7 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
or (args.cols and base_df.columns.get_loc(header) in args.cols)
|
||||
or (type == "raw_csv_table")
|
||||
):
|
||||
if header in HIDDEN_COLUMNS:
|
||||
if header in config.HIDDEN_COLUMNS:
|
||||
pass
|
||||
elif header not in comparable_columns:
|
||||
if (
|
||||
@@ -184,9 +231,13 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
else:
|
||||
for run, data in runs.items():
|
||||
cur_df = data.dfs[table_config["id"]]
|
||||
|
||||
if args.time_unit and has_time_data(base_df):
|
||||
cur_df = convert_time_columns(cur_df, args.time_unit)
|
||||
|
||||
if (type == "raw_csv_table") or (
|
||||
type == "metric_table"
|
||||
and (not header in HIDDEN_COLUMNS)
|
||||
and (not header in config.HIDDEN_COLUMNS)
|
||||
):
|
||||
if run != base_run:
|
||||
# calc percentage over the baseline
|
||||
|
||||
@@ -24,8 +24,7 @@
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
@@ -42,6 +41,8 @@ indirs = [
|
||||
"tests/workloads/vcopy/MI350",
|
||||
]
|
||||
|
||||
time_units = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_valid_path(binary_handler_analyze_rocprof_compute):
|
||||
@@ -1155,3 +1156,256 @@ def test_update_functions_coverage():
|
||||
result = update_normUnit_string("(Prefix + $normUnit)", "per_wave")
|
||||
assert "per wave" in result.lower()
|
||||
assert result[0].isupper()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_time_data():
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"Metric_ID": ["7.2.0", "7.2.1", "7.2.2"],
|
||||
"Metric": [
|
||||
"Kernel Time",
|
||||
"Kernel Time (Cycles)",
|
||||
"Non-Time Metric",
|
||||
],
|
||||
"Avg": [3446.64, 64499.39, 1000.0],
|
||||
"Min": [1769.25, 17269.25, 500.0],
|
||||
"Max": [12532.12, 337030.50, 2000.0],
|
||||
"Unit": ["ns", "Cycle", "Count"],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def original_ns_values():
|
||||
return {"Avg": 3446.64, "Min": 1769.25, "Max": 12532.12}
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_has_time_data_detection(sample_time_data):
|
||||
from utils.tty import has_time_data
|
||||
|
||||
assert has_time_data(sample_time_data)
|
||||
|
||||
no_time_data = pd.DataFrame(
|
||||
{"Metric": ["Non-Time Metric"], "Avg": [1000.0], "Unit": ["Count"]}
|
||||
)
|
||||
assert not has_time_data(no_time_data)
|
||||
|
||||
no_unit_column = pd.DataFrame({"Metric": ["Some Metric"], "Avg": [1000.0]})
|
||||
assert not has_time_data(no_unit_column)
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_default_unit_is_nanoseconds(sample_time_data):
|
||||
time_rows = sample_time_data["Unit"].str.lower().str.contains("ns", na=False)
|
||||
assert time_rows.any()
|
||||
assert sample_time_data.loc[0, "Unit"] == "ns"
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_time_unit_conversion_to_seconds(sample_time_data, original_ns_values):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
converted_df = convert_time_columns(sample_time_data, "s")
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == "s"
|
||||
|
||||
expected_avg = original_ns_values["Avg"] / time_units["s"]
|
||||
expected_min = original_ns_values["Min"] / time_units["s"]
|
||||
expected_max = original_ns_values["Max"] / time_units["s"]
|
||||
|
||||
assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-10
|
||||
assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-10
|
||||
assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-10
|
||||
|
||||
assert converted_df.loc[1, "Unit"] == "Cycle"
|
||||
assert converted_df.loc[2, "Unit"] == "Count"
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_time_unit_conversion_to_milliseconds(sample_time_data, original_ns_values):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
converted_df = convert_time_columns(sample_time_data, "ms")
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == "ms"
|
||||
|
||||
expected_avg = original_ns_values["Avg"] / time_units["ms"]
|
||||
expected_min = original_ns_values["Min"] / time_units["ms"]
|
||||
expected_max = original_ns_values["Max"] / time_units["ms"]
|
||||
|
||||
assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-6
|
||||
assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-6
|
||||
assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-6
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_time_unit_conversion_to_microseconds(sample_time_data, original_ns_values):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
converted_df = convert_time_columns(sample_time_data, "us")
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == "us"
|
||||
|
||||
expected_avg = original_ns_values["Avg"] / time_units["us"]
|
||||
expected_min = original_ns_values["Min"] / time_units["us"]
|
||||
expected_max = original_ns_values["Max"] / time_units["us"]
|
||||
|
||||
assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-3
|
||||
assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-3
|
||||
assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-3
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_time_unit_conversion_to_nanoseconds(sample_time_data, original_ns_values):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
converted_df = convert_time_columns(sample_time_data, "ns")
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == "ns"
|
||||
|
||||
assert abs(converted_df.loc[0, "Avg"] - original_ns_values["Avg"]) < 1e-10
|
||||
assert abs(converted_df.loc[0, "Min"] - original_ns_values["Min"]) < 1e-10
|
||||
assert abs(converted_df.loc[0, "Max"] - original_ns_values["Max"]) < 1e-10
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_non_time_rows_unchanged(sample_time_data):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
converted_df = convert_time_columns(sample_time_data, "ms")
|
||||
|
||||
assert converted_df.loc[1, "Unit"] == "Cycle"
|
||||
assert converted_df.loc[2, "Unit"] == "Count"
|
||||
assert converted_df.loc[1, "Avg"] == 64499.39
|
||||
assert converted_df.loc[2, "Avg"] == 1000.0
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_invalid_time_unit_handling(sample_time_data):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
original_df = sample_time_data.copy()
|
||||
converted_df = convert_time_columns(sample_time_data, "invalid_unit")
|
||||
|
||||
pd.testing.assert_frame_equal(converted_df, original_df)
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_missing_unit_column():
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
df_no_unit = pd.DataFrame({"Metric": ["Test Metric"], "Avg": [1000.0]})
|
||||
converted_df = convert_time_columns(df_no_unit, "ms")
|
||||
|
||||
pd.testing.assert_frame_equal(converted_df, df_no_unit)
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_conversion_with_missing_columns(sample_time_data, original_ns_values):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
df_partial = sample_time_data[["Metric_ID", "Metric", "Avg", "Unit"]].copy()
|
||||
converted_df = convert_time_columns(df_partial, "ms")
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == "ms"
|
||||
expected_avg = original_ns_values["Avg"] / time_units["ms"]
|
||||
assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-6
|
||||
|
||||
|
||||
@pytest.mark.time_unit_conversion
|
||||
def test_mathematical_correctness_all_units(sample_time_data, original_ns_values):
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
test_cases = [
|
||||
("s", 10**9), # 1 second = 10^9 nanoseconds
|
||||
("ms", 10**6), # 1 millisecond = 10^6 nanoseconds
|
||||
("us", 10**3), # 1 microsecond = 10^3 nanoseconds
|
||||
("ns", 1), # 1 nanosecond = 1 nanosecond
|
||||
]
|
||||
|
||||
for target_unit, divisor in test_cases:
|
||||
converted_df = convert_time_columns(sample_time_data, target_unit)
|
||||
|
||||
expected_avg = original_ns_values["Avg"] / divisor
|
||||
expected_min = original_ns_values["Min"] / divisor
|
||||
expected_max = original_ns_values["Max"] / divisor
|
||||
|
||||
assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-10
|
||||
assert abs(converted_df.loc[0, "Min"] - expected_min) < 1e-10
|
||||
assert abs(converted_df.loc[0, "Max"] - expected_max) < 1e-10
|
||||
assert converted_df.loc[0, "Unit"] == target_unit
|
||||
|
||||
|
||||
# Integration tests with show_all functionality
|
||||
@pytest.mark.time_unit_integration
|
||||
def test_integration_conversion_flow():
|
||||
from utils.tty import convert_time_columns, has_time_data
|
||||
|
||||
mock_args = Mock()
|
||||
mock_args.time_unit = "ms"
|
||||
mock_args.decimal = 2
|
||||
|
||||
sample_df = pd.DataFrame(
|
||||
{
|
||||
"Metric_ID": ["7.2.0"],
|
||||
"Metric": ["Kernel Time"],
|
||||
"Avg": [3446640.0], # 3.44664 ms in nanoseconds
|
||||
"Min": [1769250.0], # 1.76925 ms in nanoseconds
|
||||
"Max": [12532120.0], # 12.53212 ms in nanoseconds
|
||||
"Unit": ["ns"],
|
||||
}
|
||||
)
|
||||
|
||||
if has_time_data(sample_df):
|
||||
converted_df = convert_time_columns(sample_df, mock_args.time_unit)
|
||||
else:
|
||||
converted_df = sample_df
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == "ms"
|
||||
assert abs(converted_df.loc[0, "Avg"] - 3.44664) < 1e-5
|
||||
assert abs(converted_df.loc[0, "Min"] - 1.76925) < 1e-5
|
||||
assert abs(converted_df.loc[0, "Max"] - 12.53212) < 1e-5
|
||||
|
||||
|
||||
@pytest.mark.time_unit_integration
|
||||
def test_show_all_with_time_unit_conversion():
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
test_data = pd.DataFrame(
|
||||
{
|
||||
"Metric_ID": ["7.2.0"],
|
||||
"Metric": ["Kernel Time"],
|
||||
"Avg": [3446.64],
|
||||
"Min": [1769.25],
|
||||
"Max": [12532.12],
|
||||
"Unit": ["Ns"],
|
||||
}
|
||||
)
|
||||
|
||||
for time_unit in ["s", "ms", "us", "ns"]:
|
||||
converted_df = convert_time_columns(test_data, time_unit)
|
||||
|
||||
assert converted_df.loc[0, "Unit"] == time_unit
|
||||
|
||||
expected_avg = 3446.64 / time_units[time_unit]
|
||||
assert abs(converted_df.loc[0, "Avg"] - expected_avg) < 1e-10
|
||||
|
||||
|
||||
@pytest.mark.time_unit_edge_cases
|
||||
def test_edge_cases_and_error_handling():
|
||||
from utils.tty import convert_time_columns
|
||||
|
||||
empty_df = pd.DataFrame()
|
||||
result = convert_time_columns(empty_df, "ms")
|
||||
assert result.empty
|
||||
|
||||
nan_df = pd.DataFrame({"Avg": [float("nan"), 1000.0], "Unit": ["ns", "Count"]})
|
||||
result = convert_time_columns(nan_df, "ms")
|
||||
assert result.loc[0, "Unit"] == "ms"
|
||||
|
||||
mixed_case_df = pd.DataFrame({"Avg": [1000.0, 2000.0], "Unit": ["ns", "NS"]})
|
||||
result = convert_time_columns(mixed_case_df, "ms")
|
||||
assert result.loc[0, "Unit"] == "ms"
|
||||
assert result.loc[1, "Unit"] == "ms"
|
||||
|
||||
Reference in New Issue
Block a user