diff --git a/src/omniperf b/src/omniperf index 71208c1777..3b38e419b1 100755 --- a/src/omniperf +++ b/src/omniperf @@ -188,7 +188,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): header += "command," header += "host_name,host_cpu,host_distro,host_kernel,host_rocmver,date," header += "gpu_soc,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize," - header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,name,numSQC,hbmBW," + header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,hbmBW," header += "ip_blocks\n" sysinfo.write(header) @@ -232,11 +232,11 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): blocks = [] hbmBW = int(mspec.cur_MCLK) / 1000 * 4096 / 8 * 2 if mspec.GPU == "gfx906": - param += ["16", "mi50", str(int(mspec.CU) // 4), str(hbmBW)] + param += ["16", "32", "mi50", str(int(mspec.CU) // 4), str(hbmBW)] elif mspec.GPU == "gfx908": - param += ["32", "mi100", "48", str(hbmBW)] + param += ["32", "32", "mi100", "48", str(hbmBW)] elif mspec.GPU == "gfx90a": - param += ["32", "mi200", "56", str(hbmBW)] + param += ["32", "32", "mi200", "56", str(hbmBW)] if not skip_roof: blocks.append("roofline") diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml index 8ad6d9d9a2..74de040b27 100644 --- a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml @@ -104,11 +104,11 @@ Panel Config: / SQ_ACTIVE_INST_ANY))) / 5) tips: LDS BW: - value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) - pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: diff --git a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml index 178849c174..4f12a2ac3d 100644 --- a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml @@ -23,7 +23,7 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) tips: Bandwidth (Pct-of-Peak): - value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: @@ -49,11 +49,11 @@ Panel Config: unit: (Instr + $normUnit) tips: Bandwidth: - avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) unit: (Bytes + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml index 8ad6d9d9a2..74de040b27 100644 --- a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml @@ -104,11 +104,11 @@ Panel Config: / SQ_ACTIVE_INST_ANY))) / 5) tips: LDS BW: - value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) - pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: diff --git a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml index 178849c174..4f12a2ac3d 100644 --- a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml @@ -23,7 +23,7 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) tips: Bandwidth (Pct-of-Peak): - value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: @@ -49,11 +49,11 @@ Panel Config: unit: (Instr + $normUnit) tips: Bandwidth: - avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) unit: (Bytes + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml index e26910abff..f10d7630f0 100644 --- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml @@ -121,11 +121,11 @@ Panel Config: / SQ_ACTIVE_INST_ANY))) / 5) tips: LDS BW: - value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) - pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: diff --git a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml index 178849c174..4f12a2ac3d 100644 --- a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml @@ -23,7 +23,7 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) tips: Bandwidth (Pct-of-Peak): - value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: @@ -49,11 +49,11 @@ Panel Config: unit: (Instr + $normUnit) tips: Bandwidth: - avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) unit: (Bytes + $normUnit) tips: diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index fa3dec1591..6cc5676a28 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -478,6 +478,7 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): ammolite__numWavesPerCU = sys_info.maxWavesPerCU # todo: check do we still need it ammolite__numSQC = sys_info.numSQC ammolite__L2Banks = sys_info.L2Banks + ammolite__LDSBanks = soc_spec.LDSBanks # todo: eventually switch this over to sys_info. its a new spec so trying not to break compatibility ammolite__freq = sys_info.cur_sclk # todo: check do we still need it ammolite__mclk = sys_info.cur_mclk ammolite__sclk = sys_info.sclk diff --git a/src/soc_params/mi100.csv b/src/soc_params/mi100.csv index fa85f5def5..c52a4e1bb8 100644 --- a/src/soc_params/mi100.csv +++ b/src/soc_params/mi100.csv @@ -1,2 +1,2 @@ -name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,Freq,mclk -mi100,8,120,480,40,30,32,1502,1200 +name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,LDSBanks,Freq,mclk +mi100,8,120,480,40,30,32,32,1502,1200 diff --git a/src/soc_params/mi200.csv b/src/soc_params/mi200.csv index e60ca2b696..bf6343fc06 100644 --- a/src/soc_params/mi200.csv +++ b/src/soc_params/mi200.csv @@ -1,2 +1,2 @@ -name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,Freq,mclk -mi200,8,110,440,32,56,32,1700,1600 +name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,LDSBanks,Freq,mclk +mi200,8,110,440,32,56,32,32,1700,1600 diff --git a/src/soc_params/mi50.csv b/src/soc_params/mi50.csv index 959985fb2a..f5e1bda0b4 100644 --- a/src/soc_params/mi50.csv +++ b/src/soc_params/mi50.csv @@ -1,2 +1,2 @@ -name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,Freq,mclk -mi50,4,60,240,40,15,16,1725,1000 +name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,LDSBanks,Freq,mclk +mi50,4,60,240,40,15,16,32,1725,1000