From 61fd728fdbf3a8272535f6291f374a224cf5158c Mon Sep 17 00:00:00 2001 From: vedithal-amd Date: Tue, 23 Dec 2025 13:13:53 -0500 Subject: [PATCH] [rocprofiler-compute] Faster counter accuracy testing (#2420) * Faster counter accuracy testing * Better handle SPI_CSN_* metrics for lesser than MI350 series * Use metric filtering to collect only relevant counters for comparison * Ensure all workload folders are deleted after testing is completed * Dont use clean_existing=False * Add manual test for all counter accuracy --- .../tests/test_profile_general.py | 114 ++++++++++++++++-- 1 file changed, 105 insertions(+), 9 deletions(-) diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index 9e19468a8b..ff95607af3 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -566,13 +566,16 @@ def are_deterministic_counters_equal(test_dfs, baseline_df): if not all(baseline_group_keys == keys for keys in tests_group_keys): return False + # series prior to MI350 use CSN, MI350 uses CS{0,1,2,3} deterministic_counter_patterns = list( map( re.compile, [ "SQ_INSTS_.*", "SPI_CS\\d_NUM_THREADGROUPS", + "SPI_CSN_NUM_THREADGROUPS", "SPI_CS\\d_WAVE", + "SPI_CSN_WAVE", "SQ_WAVES", ], ) @@ -2645,15 +2648,31 @@ def test_iteration_multiplexing_kernel_launch_params( def test_iteration_multiplexing_deterministic_counter_accuracy( binary_handler_profile_rocprof_compute, ): + # These metrics should cover the deterministic counters being checked + options = ["--block", "6.1.5", "6.1.6", "7.2.2", "10.1"] workload_dir = test_utils.get_output_dir(param_id="no_iter_mplx") _ = binary_handler_profile_rocprof_compute( - config, workload_dir, check_success=True, roof=False, app_name="app_laplace_eqn" + config, + workload_dir, + options, + check_success=True, + roof=False, + app_name="app_laplace_eqn", ) counters_no_multiplexing = test_utils.check_csv_files( workload_dir, num_devices, num_kernels )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) - options = ["--iteration-multiplexing", "kernel"] + options = [ + "--block", + "6.1.5", + "6.1.6", + "7.2.2", + "10.1", + "--iteration-multiplexing", + "kernel", + ] workload_dir = test_utils.get_output_dir(param_id="iter_mplx_kernel") _ = binary_handler_profile_rocprof_compute( config, @@ -2666,8 +2685,17 @@ def test_iteration_multiplexing_deterministic_counter_accuracy( counters_kernel = test_utils.check_csv_files( workload_dir, num_devices, num_kernels )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) - options = ["--iteration-multiplexing", "kernel_launch_params"] + options = [ + "--block", + "6.1.5", + "6.1.6", + "7.2.2", + "10.1", + "--iteration-multiplexing", + "kernel_launch_params", + ] workload_dir = test_utils.get_output_dir(param_id="iter_mplx_params") _ = binary_handler_profile_rocprof_compute( config, @@ -2680,25 +2708,90 @@ def test_iteration_multiplexing_deterministic_counter_accuracy( counters_kernel_launch_params = test_utils.check_csv_files( workload_dir, num_devices, num_kernels )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) assert are_deterministic_counters_equal( [counters_kernel, counters_kernel_launch_params], counters_no_multiplexing ) - test_utils.clean_output_dir(config["cleanup"], workload_dir) - @pytest.mark.iteration_multiplexing_stochastic def test_iteration_multiplexing_stochastic_counter_accuracy( binary_handler_profile_rocprof_compute, ): - workload_dir = test_utils.get_output_dir(param_id="no_mplx") + workload_dir = test_utils.get_output_dir(param_id="no_iter_mplx") + # These metrics should cover the L1 cache stochastic counters + options = ["--block", "16.1", "16.3"] _ = binary_handler_profile_rocprof_compute( - config, workload_dir, check_success=True, roof=False, app_name="app_laplace_eqn" + config, + workload_dir, + options, + check_success=True, + roof=False, + app_name="app_laplace_eqn", ) counters_no_multiplexing = test_utils.check_csv_files( workload_dir, num_devices, num_kernels )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + options = ["--block", "16.1", "16.3", "--iteration-multiplexing", "kernel"] + workload_dir = test_utils.get_output_dir(param_id="iter_mplx_kernel") + _ = binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=True, + roof=False, + app_name="app_laplace_eqn_iter", + ) + counters_kernel = test_utils.check_csv_files( + workload_dir, num_devices, num_kernels + )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + options = [ + "--block", + "16.1", + "16.3", + "--iteration-multiplexing", + "kernel_launch_params", + ] + workload_dir = test_utils.get_output_dir(param_id="iter_mplx_params") + _ = binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=True, + roof=False, + app_name="app_laplace_eqn_iter", + ) + counters_kernel_launch_params = test_utils.check_csv_files( + workload_dir, num_devices, num_kernels + )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + assert are_stochastic_counters_similar( + [counters_kernel, counters_kernel_launch_params], counters_no_multiplexing + ) + + +# Not part of automated test runs since testing all counters is expensive +def test_iteration_multiplexing_all_counter_accuracy( + binary_handler_profile_rocprof_compute, +): + workload_dir = test_utils.get_output_dir(param_id="no_iter_mplx") + _ = binary_handler_profile_rocprof_compute( + config, + workload_dir, + check_success=True, + roof=False, + app_name="app_laplace_eqn", + ) + counters_no_multiplexing = test_utils.check_csv_files( + workload_dir, num_devices, num_kernels + )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) options = ["--iteration-multiplexing", "kernel"] workload_dir = test_utils.get_output_dir(param_id="iter_mplx_kernel") @@ -2713,6 +2806,7 @@ def test_iteration_multiplexing_stochastic_counter_accuracy( counters_kernel = test_utils.check_csv_files( workload_dir, num_devices, num_kernels )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) options = ["--iteration-multiplexing", "kernel_launch_params"] workload_dir = test_utils.get_output_dir(param_id="iter_mplx_params") @@ -2727,9 +2821,11 @@ def test_iteration_multiplexing_stochastic_counter_accuracy( counters_kernel_launch_params = test_utils.check_csv_files( workload_dir, num_devices, num_kernels )["pmc_perf.csv"] + test_utils.clean_output_dir(config["cleanup"], workload_dir) + assert are_deterministic_counters_equal( + [counters_kernel, counters_kernel_launch_params], counters_no_multiplexing + ) assert are_stochastic_counters_similar( [counters_kernel, counters_kernel_launch_params], counters_no_multiplexing ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir)