2023-01-24 18:53:23 -06:00
#!/usr/bin/env python3
2025-04-15 18:39:53 -04:00
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
2023-01-24 18:53:23 -06:00
import os
import re
import sys
import json
2023-02-08 11:54:45 -06:00
import math
2023-01-24 18:53:23 -06:00
import argparse
from collections import OrderedDict
2023-04-13 02:14:35 -05:00
num_stddev = 1.0
2023-01-24 18:53:23 -06:00
def mean ( _data ) :
return sum ( _data ) / float ( len ( _data ) ) if len ( _data ) > 0 else 0.0
def stddev ( _data ) :
if len ( _data ) == 0 :
return 0.0
_mean = mean ( _data )
_variance = sum ( [ ( ( x - _mean ) * * 2 ) for x in _data ] ) / float ( len ( _data ) )
2023-04-13 02:14:35 -05:00
return float ( num_stddev ) * math . sqrt ( _variance )
2023-01-24 18:53:23 -06:00
2023-02-27 12:09:03 -06:00
def simpsons_rule ( a , b , fa , fb ) :
""" Simple numerical integration via Simpson ' s rule
https://en.m.wikipedia.org/wiki/Simpson %27s _rule
"""
slope = ( fb - fa ) / ( b - a )
# f(x) at midpoint
fm = fa + ( 0.5 * ( b - a ) * slope )
factor = ( b - a ) / 6.0
# print(
# f"[{a:8.3f} : {b:8.3f}|{fa:8.3f} : {fb:8.3f}][slope={slope:8.3f}] {factor:8.3f} * ({fa:8.3f} + (4.0 * {fm:8.3f}) + {fb:8.3f})"
# )
return factor * ( fa + ( 4.0 * fm ) + fb )
2023-01-24 18:53:23 -06:00
class validation ( object ) :
def __init__ ( self , _exp_re , _pp_re , _virt , _expected , _tolerance ) :
self . experiment_filter = re . compile ( _exp_re )
self . progress_pt_filter = re . compile ( _pp_re )
self . virtual_speedup = int ( _virt )
self . program_speedup = float ( _expected )
self . tolerance = float ( _tolerance )
2023-02-08 11:54:45 -06:00
def validate (
self ,
_exp_name ,
_pp_name ,
_virt_speedup ,
_prog_speedup ,
_prog_speedup_stddev ,
_base_speedup_stddev ,
2023-02-27 12:09:03 -06:00
_ci = False ,
2023-02-08 11:54:45 -06:00
) :
2023-01-24 18:53:23 -06:00
if (
not re . search ( self . experiment_filter , _exp_name )
or not re . search ( self . progress_pt_filter , _pp_name )
or _virt_speedup != self . virtual_speedup
) :
return None
2023-02-08 11:54:45 -06:00
_tolerance = self . tolerance
2023-04-13 02:14:35 -05:00
_reason = " [unspecified reason] "
if _ci is True :
""" On GitHub Action servers, you typically only get two CPUs, which may be one
core with two hyperthreads. The hyperthreading can causes the speedup potential
to drop. Furthermore, these are typically shared resources so the runtime may
vary significantly. Thus, always account for stddev to prevent failures due to
these causes
2023-02-27 12:09:03 -06:00
"""
_tolerance + = max ( [ _base_speedup_stddev , _prog_speedup_stddev ] )
2023-04-13 02:14:35 -05:00
_reason = " results obtained on a shared CI system... potentially artificially deflating speedup predictions "
2023-02-27 12:09:03 -06:00
elif _base_speedup_stddev > self . tolerance :
2023-02-08 11:54:45 -06:00
_tolerance + = math . sqrt ( _base_speedup_stddev )
2023-04-13 02:14:35 -05:00
_reason = (
f " large standard deviation of the baseline ( { _base_speedup_stddev : .3f } ) "
)
2023-02-27 12:09:03 -06:00
elif _prog_speedup_stddev > 1.0 :
_tolerance + = math . sqrt ( _prog_speedup_stddev )
2023-04-13 02:14:35 -05:00
_reason = f " large standard deviation of the program speedup ( { _prog_speedup_stddev : .3f } ) "
2023-02-27 12:09:03 -06:00
if _tolerance > self . tolerance :
2023-02-08 11:54:45 -06:00
sys . stderr . write (
2023-04-13 02:14:35 -05:00
f " [ { _exp_name } ][ { _pp_name } ][ { _virt_speedup } ] Tolerance increased: { _reason } ( { self . tolerance : .3f } increased to { _tolerance : .3f } )... \n "
2023-02-08 11:54:45 -06:00
)
2023-02-27 12:09:03 -06:00
def _compute ( _speedup_v , _tolerance_v ) :
return _speedup_v > = ( self . program_speedup - _tolerance_v ) and _speedup_v < = (
self . program_speedup + _tolerance_v
)
return _compute ( _prog_speedup , _tolerance )
2023-01-24 18:53:23 -06:00
2023-02-09 09:47:48 -06:00
class throughput_point ( object ) :
2023-01-24 18:53:23 -06:00
def __init__ ( self , _speedup ) :
self . speedup = _speedup
2023-02-09 09:47:48 -06:00
self . delta = [ ]
2023-01-24 18:53:23 -06:00
self . duration = [ ]
2023-02-09 09:47:48 -06:00
def __iadd__ ( self , _data ) :
self . delta + = [ float ( _data [ 0 ] ) ]
self . duration + = [ float ( _data [ 1 ] ) ]
2023-01-24 18:53:23 -06:00
def __len__ ( self ) :
return len ( self . duration )
def __eq__ ( self , rhs ) :
return self . speedup == rhs . speedup
def __neq__ ( self , rhs ) :
return not self == rhs
def __lt__ ( self , rhs ) :
return self . speedup < rhs . speedup
2023-02-09 09:47:48 -06:00
def get_data ( self ) :
return [ x / y for x , y in zip ( self . duration , self . delta ) ]
2023-01-24 18:53:23 -06:00
def mean ( self ) :
2023-02-09 09:47:48 -06:00
return sum ( self . duration ) / sum ( self . delta )
class latency_point ( object ) :
def __init__ ( self , _speedup ) :
self . speedup = _speedup
self . arrivals = [ ]
self . departures = [ ]
self . duration = [ ]
2023-01-24 18:53:23 -06:00
2023-02-09 09:47:48 -06:00
def __iadd__ ( self , _data ) :
self . arrivals + = [ float ( _data [ 0 ] ) ]
self . departures + = [ float ( _data [ 1 ] ) ]
self . duration + = [ float ( _data [ 2 ] ) ]
def __len__ ( self ) :
return len ( self . duration )
def __eq__ ( self , rhs ) :
return self . speedup == rhs . speedup
def __neq__ ( self , rhs ) :
return not self == rhs
def __lt__ ( self , rhs ) :
return self . speedup < rhs . speedup
def get_data ( self ) :
_duration = sum ( self . duration )
return [ y / x for x , y in zip ( self . arrivals , self . duration ) ]
def get_difference ( self ) :
_duration = sum ( self . duration )
return [ x / _duration for x in self . duration ]
def mean ( self ) :
rate = sum ( self . arrivals ) / sum ( self . duration )
return sum ( self . get_difference ( ) ) / rate
2023-01-24 18:53:23 -06:00
class line_speedup ( object ) :
def __init__ ( self , _name = " " , _prog = " " , _exp_data = None , _exp_base = None ) :
self . name = _name
self . prog = _prog
self . data = _exp_data
self . base = _exp_base
def virtual_speedup ( self ) :
if self . data is None or self . base is None :
return 0.0
return self . data . speedup
def compute_speedup ( self ) :
if self . data is None or self . base is None :
return 0.0
return ( ( self . base . mean ( ) - self . data . mean ( ) ) / self . base . mean ( ) ) * 100
def compute_speedup_stddev ( self ) :
if self . data is None or self . base is None :
return 0.0
_data = [ ]
_base = self . base . mean ( )
2023-02-09 09:47:48 -06:00
for ditr in self . data . get_data ( ) :
2023-01-24 18:53:23 -06:00
_data + = [ ( ( _base - ditr ) / _base ) * 100 ]
return stddev ( _data )
def get_name ( self ) :
return " : " . join (
[
os . path . basename ( x ) if os . path . isfile ( x ) else x
for x in self . name . split ( " : " )
]
)
def __str__ ( self ) :
if self . data is None or self . base is None :
return f " { self . name } "
_line_speedup = self . compute_speedup ( )
2023-04-13 02:14:35 -05:00
_line_stddev = self . compute_speedup_stddev ( ) # 3 stddev == 99.87%
2023-01-24 18:53:23 -06:00
_name = self . get_name ( )
return f " [ { _name } ][ { self . prog } ][ { self . data . speedup : 3 } ] speedup: { _line_speedup : 6.1f } +/- { _line_stddev : 6.2f } % "
def __eq__ ( self , rhs ) :
return (
self . name == rhs . name
and self . prog == rhs . prog
and self . data == rhs . data
and self . base == rhs . base
)
def __neq__ ( self , rhs ) :
return not self == rhs
def __lt__ ( self , rhs ) :
if self . name != rhs . name :
return self . name < rhs . name
elif self . prog != rhs . prog :
return self . prog < rhs . prog
elif self . data != rhs . data :
return self . data < rhs . data
elif self . base != rhs . base :
return self . base < rhs . base
return False
class experiment_progress ( object ) :
def __init__ ( self , _data ) :
self . data = _data
def get_impact ( self ) :
2023-02-27 12:09:03 -06:00
speedup_c = [ float ( x . compute_speedup ( ) ) for x in self . data ]
speedup_v = [ float ( x . virtual_speedup ( ) ) for x in self . data ]
2023-01-24 18:53:23 -06:00
impact = [ ]
for i in range ( len ( self . data ) - 1 ) :
2023-02-27 12:09:03 -06:00
impact + = [
simpsons_rule (
speedup_v [ i ] , speedup_v [ i + 1 ] , speedup_c [ i ] , speedup_c [ i + 1 ]
)
]
2023-01-24 18:53:23 -06:00
return [ sum ( impact ) , mean ( impact ) , stddev ( impact ) ]
def __len__ ( self ) :
return len ( self . data )
def __str__ ( self ) :
_impact_v = self . get_impact ( )
_name = self . data [ 0 ] . get_name ( )
_prog = self . data [ 0 ] . prog
_impact = [
f " [ { _name } ][ { _prog } ][sum] impact: { _impact_v [ 0 ] : 6.1f } " ,
f " [ { _name } ][ { _prog } ][avg] impact: { _impact_v [ 1 ] : 6.1f } +/- { _impact_v [ 2 ] : 6.2f } " ,
]
return " \n " . join ( [ f " { x } " for x in self . data ] + _impact )
def __lt__ ( self , rhs ) :
self . data . sort ( )
return self . get_impact ( ) [ 0 ] < rhs . get_impact ( ) [ 0 ]
2023-02-27 12:09:03 -06:00
def process_samples ( data , _data ) :
if not _data :
return data
2024-10-15 11:20:40 -04:00
for record in _data [ " rocprofsys " ] [ " causal " ] [ " records " ] :
2023-02-27 12:09:03 -06:00
for samp in record [ " samples " ] :
_info = samp [ " info " ]
_count = samp [ " count " ]
_func = _info [ " dfunc " ]
if _func not in data :
data [ _func ] = 0
data [ _func ] + = _count
for dwarf_entry in _info [ " dwarf_info " ] :
_name = " {} : {} " . format ( dwarf_entry [ " file " ] , dwarf_entry [ " line " ] )
if _name not in data :
data [ _name ] = 0
data [ _name ] + = _count
return data
2023-01-24 18:53:23 -06:00
def process_data ( data , _data , args ) :
2023-02-27 12:09:03 -06:00
def find_or_insert ( _data , _value , _type ) :
if _value not in _data :
if _type == " throughput " :
_data [ _value ] = throughput_point ( _value )
elif _type == " latency " :
_data [ _value ] = latency_point ( _value )
return _data [ _value ]
2023-01-24 18:53:23 -06:00
if not _data :
return data
_selection_filter = re . compile ( args . experiments )
_progresspt_filter = re . compile ( args . progress_points )
2024-10-15 11:20:40 -04:00
for record in _data [ " rocprofsys " ] [ " causal " ] [ " records " ] :
2023-01-24 18:53:23 -06:00
for exp in record [ " experiments " ] :
_speedup = exp [ " virtual_speedup " ]
_duration = exp [ " duration " ]
_file = exp [ " selection " ] [ " info " ] [ " file " ]
_line = exp [ " selection " ] [ " info " ] [ " line " ]
_func = exp [ " selection " ] [ " info " ] [ " dfunc " ]
_sym_addr = exp [ " selection " ] [ " symbol_address " ]
_selected = " : " . join ( [ _file , f " { _line } " ] ) if _sym_addr == 0 else _func
if not re . search ( _selection_filter , _selected ) :
continue
if _selected not in data :
data [ _selected ] = { }
for pts in exp [ " progress_points " ] :
_name = pts [ " name " ]
if not re . search ( _progresspt_filter , _name ) :
continue
if _name not in data [ _selected ] :
data [ _selected ] [ _name ] = { }
if " delta " in pts :
_delt = pts [ " delta " ]
if _delt > 0 :
2023-02-09 09:47:48 -06:00
itr = find_or_insert (
data [ _selected ] [ _name ] , _speedup , " throughput "
)
itr + = [ _delt , _duration ]
elif " arrival " in pts and pts [ " arrival " ] > 0 :
itr = find_or_insert ( data [ _selected ] [ _name ] , _speedup , " latency " )
itr + = [ pts [ " arrival " ] , pts [ " departure " ] , _duration ]
2023-01-24 18:53:23 -06:00
else :
_delt = pts [ " laps " ]
if _delt > 0 :
itr = find_or_insert ( data [ _selected ] [ _name ] , _speedup )
2023-02-09 09:47:48 -06:00
itr + = [ _delt , _duration ]
2023-01-24 18:53:23 -06:00
return data
def compute_speedups ( _data , args ) :
data = { }
for selected , pitr in _data . items ( ) :
if selected not in data :
data [ selected ] = { }
for progpt , ditr in pitr . items ( ) :
data [ selected ] [ progpt ] = OrderedDict ( sorted ( ditr . items ( ) ) )
from os . path import dirname
ret = [ ]
for selected , pitr in _data . items ( ) :
for progpt , ditr in pitr . items ( ) :
if 0 not in ditr . keys ( ) :
continue
_baseline = ditr [ 0 ] . mean ( )
for speedup , itr in ditr . items ( ) :
if len ( args . speedups ) > 0 and speedup not in args . speedups :
continue
if speedup != itr . speedup :
raise ValueError ( f " in { selected } : { speedup } != { itr . speedup } " )
2023-04-13 02:14:35 -05:00
if len ( itr ) > = args . min_experiments :
_val = line_speedup ( selected , progpt , itr , ditr [ 0 ] )
ret . append ( _val )
2023-01-24 18:53:23 -06:00
ret . sort ( )
_last_name = None
_last_prog = None
result = [ ]
for itr in ret :
if itr . name != _last_name or itr . prog != _last_prog :
result . append ( [ ] )
result [ - 1 ] . append ( itr )
_last_name = itr . name
_last_prog = itr . prog
_data = [ ]
for itr in result :
_data . append ( experiment_progress ( itr ) )
_data . sort ( )
return _data
def get_validations ( args ) :
data = [ ]
_len = len ( args . validate )
if _len == 0 :
return data
elif _len % 5 != 0 :
raise ValueError (
" validation requires format: { experiment regex} { progress-point regex} { virtual-speedup} { expected-speedup} {tolerance} (i.e. 5 args per validation. There are {} extra/missing arguments " . format (
_len % 5
)
)
v = args . validate
for i in range ( int ( _len / 5 ) ) :
off = 5 * i
data . append (
validation ( v [ off + 0 ] , v [ off + 1 ] , v [ off + 2 ] , v [ off + 3 ] , v [ off + 4 ] )
)
return data
def main ( ) :
import argparse
2023-04-13 02:14:35 -05:00
global num_stddev
2023-01-24 18:53:23 -06:00
parser = argparse . ArgumentParser ( )
parser . add_argument (
" -e " , " --experiments " , type = str , help = " Regex for experiments " , default = " .* "
)
parser . add_argument (
" -p " ,
" --progress-points " ,
type = str ,
help = " Regex for progress points " ,
default = " .* " ,
)
parser . add_argument (
" -n " , " --num-points " , type = int , help = " Minimum number of data points " , default = 5
)
2023-04-13 02:14:35 -05:00
parser . add_argument (
" -m " ,
" --min-experiments " ,
type = int ,
help = " Minimum number of experiments per speedup (e.g. do not display speedups when there are fewer than X experiments at this speedup) " ,
default = 2 ,
)
2023-01-24 18:53:23 -06:00
parser . add_argument (
" -i " , " --input " , type = str , nargs = " * " , help = " Input file(s) " , required = True
)
parser . add_argument (
" -s " ,
" --speedups " ,
type = int ,
help = " List of speedup values to report " ,
nargs = " * " ,
default = [ ] ,
)
parser . add_argument (
" -d " ,
" --stddev " ,
2023-04-13 02:14:35 -05:00
type = float ,
2023-01-24 18:53:23 -06:00
help = " Number of standard deviations to report " ,
2023-04-13 02:14:35 -05:00
default = 1.0 ,
2023-01-24 18:53:23 -06:00
)
parser . add_argument (
" -v " ,
" --validate " ,
type = str ,
nargs = " * " ,
help = " Validate speedup: { experiment regex} { progress-point regex} { virtual-speedup} { expected-speedup} {tolerance} " ,
default = [ ] ,
)
2023-04-13 02:14:35 -05:00
parser . add_argument (
" --samples " ,
type = float ,
help = " Report samples within this percentage of the peak (0.0, 100.0] (default: 95 percent) " ,
default = 95.0 ,
)
2023-02-27 12:09:03 -06:00
parser . add_argument (
" --ci " ,
action = " store_true " ,
help = " {} . {} " . format (
" Accept speedup predictions when: (A) virtual speedup > 10 and (B) prediction is within the tolerance after being increased by (0.5 * stddev) and (1.0 * stddev) " ,
" This is primarily used for the CI where the two threads commonly run on 1 CPU core with 2 hyperthreads (causing the speedup potential to drop) " ,
) ,
)
2023-01-24 18:53:23 -06:00
args = parser . parse_args ( )
num_stddev = args . stddev
num_speedups = len ( args . speedups )
2023-04-13 02:14:35 -05:00
percent_samples = args . samples
if not percent_samples > 0.0 and not percent_samples < = 100.0 :
raise ValueError (
f " Invalid samples value: { percent_samples } . Supported range: 0.0 < x <= 100.0 "
)
percent_samples = 1.0 - ( percent_samples / 100.0 )
2023-01-24 18:53:23 -06:00
if num_speedups > 0 and args . num_points > num_speedups :
args . num_points = num_speedups
data = { }
2023-02-27 12:09:03 -06:00
samp = { }
2023-01-24 18:53:23 -06:00
for inp in args . input :
with open ( inp , " r " ) as f :
inp_data = json . load ( f )
data = process_data ( data , inp_data , args )
2023-02-27 12:09:03 -06:00
samp = process_samples ( samp , inp_data )
print ( " Samples: " )
2023-04-13 02:14:35 -05:00
width = max ( [ int ( math . log10 ( x ) + 1 ) for _ , x in samp . items ( ) ] )
samp_peak = max ( [ count for _ , count in samp . items ( ) ] )
for name , count in sorted ( samp . items ( ) , key = lambda x : x [ 1 ] , reverse = True ) :
if count > = samp_peak * percent_samples :
print ( f " { count : { width } } :: { name } " )
2023-01-24 18:53:23 -06:00
results = compute_speedups ( data , args )
2023-02-27 12:09:03 -06:00
print ( " " )
print ( " Experiments: " )
2023-01-24 18:53:23 -06:00
for itr in results :
if len ( itr ) < args . num_points :
continue
print ( " " )
2023-02-27 12:09:03 -06:00
# split each line, indent each line, and join again into single string
print ( " {} " . format ( " \n " . join ( [ f " { x } " for x in f " { itr } " . split ( " \n " ) ] ) ) )
2023-01-24 18:53:23 -06:00
2023-02-08 11:54:45 -06:00
sys . stdout . flush ( )
2023-01-24 18:53:23 -06:00
validations = get_validations ( args )
expected_validations = len ( validations )
correct_validations = 0
if expected_validations > 0 :
print ( f " \n Performing { expected_validations } validations... \n " )
for eitr in results :
_experiment = eitr . data [ 0 ] . get_name ( )
_progresspt = eitr . data [ 0 ] . prog
2023-02-08 11:54:45 -06:00
_base_speedup_stddev = eitr . data [ 0 ] . compute_speedup_stddev ( )
2023-01-24 18:53:23 -06:00
for ditr in eitr . data :
_virt_speedup = ditr . virtual_speedup ( )
_prog_speedup = ditr . compute_speedup ( )
2023-02-08 11:54:45 -06:00
_prog_speedup_stddev = ditr . compute_speedup_stddev ( )
2023-01-24 18:53:23 -06:00
for vitr in validations :
_v = vitr . validate (
2023-02-08 11:54:45 -06:00
_experiment ,
_progresspt ,
_virt_speedup ,
_prog_speedup ,
_prog_speedup_stddev ,
_base_speedup_stddev ,
2023-02-27 12:09:03 -06:00
args . ci ,
2023-01-24 18:53:23 -06:00
)
if _v is None :
continue
if _v is True :
correct_validations + = 1
else :
sys . stderr . write (
2023-02-27 12:09:03 -06:00
f " \n [ { _experiment } ][ { _progresspt } ][ { _virt_speedup } ] failed validation: { _prog_speedup : 8.3f } != { vitr . program_speedup } +/- { vitr . tolerance } \n \n "
2023-01-24 18:53:23 -06:00
)
if expected_validations != correct_validations :
sys . stderr . flush ( )
sys . stderr . write (
f " \n Causal profiling predictions not validated. Expected { expected_validations } , found { correct_validations } \n "
)
sys . stderr . flush ( )
sys . exit ( - 1 )
elif expected_validations > 0 :
print ( f " Causal profiling predictions validated: { expected_validations } " )
if __name__ == " __main__ " :
main ( )