diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/CMakeLists.txt similarity index 71% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/CMakeLists.txt index 7c69079486..bbdb0c2001 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/CMakeLists.txt @@ -25,6 +25,6 @@ add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test ) # # Style format # -execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" ) -execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" ) -execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" ) +execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" ) +execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" ) +execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" ) diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/Readme.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/Readme.txt similarity index 78% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/Readme.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/Readme.txt index 13c729393b..c1165a7003 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/Readme.txt +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/Readme.txt @@ -6,9 +6,9 @@ Current library implementation supports only GFX9. The library source tree: - doc - Documantation, the API specification and the presentation - inc - Public API - - hsa_ext_amd_aql_profile.h - AMD AQL profile library public API + - hsa_ven_amd_aqlprofile.h - AMD AQL profile library public API - src - AMD AQL profile library sources - - aqlprofile - AMD AQL profile library + - core - the library sources - commandwriter - PM4 command writer originated from 'hsa-runtime/tools' - perfcounter - PM4 perfcounter manager originated from 'hsa-runtime/tools' - threadtrace - PM4 threadtrace manager originated from 'hsa-runtime/tools' @@ -19,7 +19,7 @@ The library source tree: To build the library: -$ cd .../hsa-ext-aql-profile +$ cd .../hsa-amd-aqlprofile $ mkdir build $ cd build $ cmake .. @@ -27,8 +27,9 @@ $ make To run the test: -$ cd .../hsa-ext-aql-profile/build -$ test/ctrl +$ cd .../hsa-amd-aqlprofile/build +$ export LD_LIBRARY_PATH=$PWD +$ ./test/ctrl To enable PMC profiling: @@ -37,3 +38,7 @@ $ export ROCR_ENABLE_PMC=1 To enable SQTT profiling: $ export ROCR_ENABLE_SQTT=1 + +Or to use the script: + +$ ./run.sh diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/cmake_modules/exportToolFlags.cmake b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/cmake_modules/exportToolFlags.cmake similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/cmake_modules/exportToolFlags.cmake rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/cmake_modules/exportToolFlags.cmake diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/cmake_modules/validateBldEnv.cmake b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/cmake_modules/validateBldEnv.cmake similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/cmake_modules/validateBldEnv.cmake rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/cmake_modules/validateBldEnv.cmake diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/doc/HSA_ext_profile_api.pptx b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/doc/HSA_ven_amd_aqlprofile_api.pptx similarity index 89% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/doc/HSA_ext_profile_api.pptx rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/doc/HSA_ven_amd_aqlprofile_api.pptx index a3df42bba0..ec356d6409 100644 Binary files a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/doc/HSA_ext_profile_api.pptx and b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/doc/HSA_ven_amd_aqlprofile_api.pptx differ diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/doc/HSA_ven_amd_aqlprofile_api_v1_2_0.docx b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/doc/HSA_ven_amd_aqlprofile_api_v1_2_0.docx new file mode 100644 index 0000000000..3a1fc1bcc3 Binary files /dev/null and b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/doc/HSA_ven_amd_aqlprofile_api_v1_2_0.docx differ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_bonaire.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_bonaire.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_bonaire.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_bonaire.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_hainan.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_hainan.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_hainan.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_hainan.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_oland.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_oland.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_oland.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_oland.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_pitcairn.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_pitcairn.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_pitcairn.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_pitcairn.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_tahiti.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_tahiti.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_tahiti.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_tahiti.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_tiran.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_tiran.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_tiran.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_tiran.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_verde.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_verde.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/features_verde.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/features_verde.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/gfx8_utils.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/gfx8_utils.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/gfx8_utils.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/gfx8_utils.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_enum.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_enum.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_enum.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_enum.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_mask.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_mask.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_mask.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_mask.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_offset.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_offset.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_offset.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_offset.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_pm4_it_opcodes.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_pm4_it_opcodes.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_pm4_it_opcodes.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_pm4_it_opcodes.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_pm4cmds.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_pm4cmds.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_pm4cmds.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_pm4cmds.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_shift.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_shift.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_shift.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_shift.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_sq_reg.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_sq_reg.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_sq_reg.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_sq_reg.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_sq_uc_reg.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_sq_uc_reg.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_sq_uc_reg.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_sq_uc_reg.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_typedef.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_typedef.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_ci_vi_merged_typedef.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_ci_vi_merged_typedef.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_pm4defs.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_pm4defs.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx8/si_pm4defs.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx8/si_pm4defs.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_ce_pm4_packets_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_ce_pm4_packets_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_ce_pm4_packets_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_ce_pm4_packets_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_me_pm4_packets_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_me_pm4_packets_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_me_pm4_packets_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_me_pm4_packets_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mec_aql_packets_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mec_aql_packets_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mec_aql_packets_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mec_aql_packets_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mec_cmn_structs_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mec_cmn_structs_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mec_cmn_structs_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mec_cmn_structs_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mec_pm4_packets_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mec_pm4_packets_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mec_pm4_packets_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mec_pm4_packets_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mes_pm4_packets_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mes_pm4_packets_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_mes_pm4_packets_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_mes_pm4_packets_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_pfp_pm4_packets_vg10.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_pfp_pm4_packets_vg10.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/f32_pfp_pm4_packets_vg10.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/f32_pfp_pm4_packets_vg10.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_enum.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_enum.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_enum.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_enum.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_mask.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_mask.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_mask.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_mask.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_offset.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_offset.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_offset.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_offset.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_pm4_it_opcodes.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_pm4_it_opcodes.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_pm4_it_opcodes.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_pm4_it_opcodes.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_pm4defs.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_pm4defs.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_pm4defs.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_pm4defs.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_shift.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_shift.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_shift.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_shift.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_typedef.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_typedef.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_typedef.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_typedef.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_utils.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_utils.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/gfxip/gfx9/gfx9_utils.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/gfxip/gfx9/gfx9_utils.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/CMakeLists.txt similarity index 81% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/CMakeLists.txt index ac980faad6..36b8928879 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/CMakeLists.txt @@ -20,10 +20,9 @@ if ( NOT DEFINED PROJ_DIR ) set ( ROOT_DIR ${PROJ_DIR}/.. ) endif () -set ( API_DIR ${ROOT_DIR}/inc ) -set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../.. ) -set ( HSA_RUNTIME_OSC_DIR ${HSA_RUNTIME_DIR}/opensrc/hsa-runtime ) -set ( CORE_UTIL_DIR ${HSA_RUNTIME_OSC_DIR}/core/util ) +set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../hsa-runtime ) +set ( API_DIR ${HSA_RUNTIME_DIR}/inc ) +set ( CORE_UTIL_DIR ${HSA_RUNTIME_DIR}/core/util ) include_directories ( ${ROOT_DIR} ) @@ -63,4 +62,9 @@ add_subdirectory ( ${PROJ_DIR}/perfcounter "${PROJECT_BINARY_DIR}/perfcounter" ) # libraries that have been built in this regard # set ( TARGET_LIB "${TARGET_NAME}${ONLY64STR}" ) -add_subdirectory ( ${PROJ_DIR}/${TARGET_NAME} "${PROJECT_BINARY_DIR}/${TARGET_NAME}" ) +add_subdirectory ( ${PROJ_DIR}/core "${PROJECT_BINARY_DIR}/core" ) + +# +# Creating the library link +# +execute_process ( COMMAND sh -xc "/bin/ln -s core/lib${TARGET_LIB}.so libhsa-amd-${TARGET_LIB}.so.1" ) diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/CMakeLists.txt similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/CMakeLists.txt diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/cmdwriter.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/cmdwriter.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/cmdwriter.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/cmdwriter.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx8_cmds.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx8_cmds.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx8_cmds.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx8_cmds.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx8_cmdwriter.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx8_cmdwriter.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx8_cmdwriter.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx8_cmdwriter.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx8_cmdwriter.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx8_cmdwriter.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx8_cmdwriter.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx8_cmdwriter.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx9_cmds.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx9_cmds.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx9_cmds.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx9_cmds.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx9_cmdwriter.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx9_cmdwriter.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx9_cmdwriter.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx9_cmdwriter.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx9_cmdwriter.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx9_cmdwriter.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/commandwriter/gfx9_cmdwriter.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/commandwriter/gfx9_cmdwriter.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/CMakeLists.txt similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/CMakeLists.txt diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/amd_aql_pm4_ib_packet.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/amd_aql_pm4_ib_packet.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/amd_aql_pm4_ib_packet.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/amd_aql_pm4_ib_packet.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile.cpp similarity index 54% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile.cpp index 26eadca6b5..e6abeea75f 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile.cpp @@ -1,6 +1,9 @@ -#include - #include "aql_profile.h" + +#include +#include +#include + #include "pm4_factory.h" #include "cmdwriter.h" // commandwriter #include "perf_counter.h" // perfcounter @@ -9,6 +12,14 @@ #include "logger.h" #define PUBLIC_API __attribute__((visibility("default"))) +#define DESTRUCTOR_API __attribute__((destructor)) +#define ERR_CHECK(cond, err, msg) \ + { \ + if (cond) { \ + ERR_LOGGING << msg; \ + return err; \ + } \ + } namespace aql_profile { @@ -31,7 +42,7 @@ class CommandBufferMgr { uint32_t align(const uint32_t& size) { return (size + align_mask) & ~align_mask; } public: - CommandBufferMgr(const profile_t* profile) + explicit CommandBufferMgr(const profile_t* profile) : buffer(profile->command_buffer), postfix_size(0), info(NULL) { info = (info_t*)setPostfix(sizeof(info_t)); } @@ -44,12 +55,16 @@ class CommandBufferMgr { postfix_size = size; buffer.size -= (delta < buffer.size) ? delta : buffer.size; } + if (buffer.size == 0) + throw aql_profile_exc_msg("CommandBufferMgr::setPostfix(): buffer size set to zero"); return (buffer.size != 0) ? buffer.ptr + buffer.size : NULL; } bool setPreSize(const uint32_t& size) { bool suc = (size <= buffer.size); if (suc) info->precmds_size = size; + if (!suc) + throw aql_profile_exc_msg("CommandBufferMgr::setPreSize(): size set out of the buffer"); return suc; } @@ -62,6 +77,8 @@ class CommandBufferMgr { info->postcmds_size = size - info->precmds_size; suc = ((getPostOffset() + info->postcmds_size) <= buffer.size); } + if (!suc) + throw aql_profile_exc_msg("CommandBufferMgr::checkTotalSize(): size set out of the buffer"); return suc; } @@ -80,19 +97,55 @@ class CommandBufferMgr { } }; +static inline pm4_profile::CountersMap CountersMapCreate(const profile_t* profile, + const Pm4Factory* pm4_factory) { + pm4_profile::CountersMap countersMap; + for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events; + p < profile->events + profile->event_count; ++p) { + countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id); + } + return countersMap; +} + +typedef std::vector EventsVec; +static inline EventsVec EventsVecCreate(const profile_t* profile, const Pm4Factory* pm4_factory) { + pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory); + + std::map id_map; + for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events; + p < profile->events + profile->event_count; ++p) { + id_map.insert(decltype(id_map)::value_type(pm4_factory->getBlockId(p), p)); + } + + // Iterate through the list of blocks/counters to generate correct order events vector + EventsVec eventsVec; + for (pm4_profile::CountersMap::const_iterator block_it = countersMap.begin(); + block_it != countersMap.end(); ++block_it) { + const uint32_t block_id = block_it->first; + const pm4_profile::CountersVec& counters = block_it->second; + const uint32_t counter_count = counters.size(); + + for (uint32_t ind = 0; ind < counter_count; ++ind) { + eventsVec.push_back(id_map[block_id] + ind); + } + } + + return eventsVec; +} + static inline bool is_event_match(const event_t& event1, const event_t& event2) { return (event1.block_name == event2.block_name) && (event1.block_index == event2.block_index) && (event1.counter_id == event2.counter_id); } -hsa_status_t default_pmcdata_callback(hsa_ext_amd_aql_profile_info_type_t info_type, - hsa_ext_amd_aql_profile_info_data_t* info_data, +hsa_status_t default_pmcdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, void* callback_data) { hsa_status_t status = HSA_STATUS_SUCCESS; - hsa_ext_amd_aql_profile_info_data_t* passed_data = - reinterpret_cast(callback_data); + hsa_ven_amd_aqlprofile_info_data_t* passed_data = + reinterpret_cast(callback_data); - if (info_type == HSA_EXT_AQL_PROFILE_INFO_PMC_DATA) { + if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) { if (is_event_match(info_data->pmc_data.event, passed_data->pmc_data.event)) { if (passed_data->sample_id == UINT32_MAX) { passed_data->pmc_data.result += info_data->pmc_data.result; @@ -112,14 +165,14 @@ struct sqtt_ctrl_t { uint32_t writePtr; }; -hsa_status_t default_sqttdata_callback(hsa_ext_amd_aql_profile_info_type_t info_type, - hsa_ext_amd_aql_profile_info_data_t* info_data, +hsa_status_t default_sqttdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, void* callback_data) { hsa_status_t status = HSA_STATUS_SUCCESS; - hsa_ext_amd_aql_profile_info_data_t* passed_data = - reinterpret_cast(callback_data); + hsa_ven_amd_aqlprofile_info_data_t* passed_data = + reinterpret_cast(callback_data); - if (info_type == HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA) { + if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) { if (info_data->sample_id == passed_data->sample_id) { passed_data->sqtt_data = info_data->sqtt_data; status = HSA_STATUS_INFO_BREAK; @@ -129,83 +182,98 @@ hsa_status_t default_sqttdata_callback(hsa_ext_amd_aql_profile_info_type_t info_ return status; } -Pm4Factory::tables_t Pm4Factory::tables; +std::mutex Logger::mutex; +Logger* Logger::instance = NULL; +std::mutex Pm4Factory::mutex; +Pm4Factory::instances_t Pm4Factory::instances; + +DESTRUCTOR_API void destructor() { + Logger::Destroy(); + Pm4Factory::Destroy(); +} } // aql_profile extern "C" { -// Check if event is valid for the specific GPU -PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_validate_event( - hsa_agent_t agent, const hsa_ext_amd_aql_profile_event_t* event, bool* result) { +PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_error_string(const char** str) { + *str = aql_profile::Logger::LastMessage().c_str(); return HSA_STATUS_SUCCESS; } -// Method to populate the provided AQL packet with profiling start commands -PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start( - const hsa_ext_amd_aql_profile_profile_t* profile, aql_profile::packet_t* aql_start_packet) { - aql_profile::Logger logger; +// Check if event is valid for the specific GPU +PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_validate_event( + hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t* event, bool* result) { + hsa_status_t status = HSA_STATUS_SUCCESS; + *result = false; + try { + aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent); + if (pm4_factory->getBlockInfo(event) != NULL) *result = true; + } catch (aql_profile::event_exception& e) { + INFO_LOGGING << e.what(); + } catch (std::exception& e) { + ERR_LOGGING << e.what(); + status = HSA_STATUS_ERROR; + } + + return status; +} + +// Method to populate the provided AQL packet with profiling start commands +PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start( + const hsa_ven_amd_aqlprofile_profile_t* profile, aql_profile::packet_t* aql_start_packet) { try { aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile); - if (pm4_factory == NULL) return HSA_STATUS_ERROR; - pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter(); - if (cmdWriter == NULL) return HSA_STATUS_ERROR; - pm4_profile::DefaultCmdBuf commands; aql_profile::CommandBufferMgr cmdBufMgr(profile); - if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR; - if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_PMC) { - pm4_profile::Pmu* pmcMgr = pm4_factory->getPmcMgr(); - if (pmcMgr == NULL) return HSA_STATUS_ERROR; - - pm4_profile::CountersMap countersMap; - for (const hsa_ext_amd_aql_profile_event_t* p = profile->events; - p < profile->events + profile->event_count; ++p) { - countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id); - } + if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) { + pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr(); // Generate start commands + const pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory); pmcMgr->begin(&commands, cmdWriter, countersMap); cmdBufMgr.setPreSize(commands.Size()); // Generate stop commands const uint32_t data_size = pmcMgr->end(&commands, cmdWriter, countersMap, profile->output_buffer.ptr); - if (data_size == 0) return HSA_STATUS_ERROR; + ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC mgr end(): data size set to zero"); assert(data_size <= profile->output_buffer.size); - if (data_size > profile->output_buffer.size) return HSA_STATUS_ERROR; - - } else if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_SQTT) { + if (data_size > profile->output_buffer.size) { + ERR_LOGGING << "data size assertion failed, data_size(" << data_size << "), buffer size(" + << profile->output_buffer.size << ")"; + return HSA_STATUS_ERROR; + } + } else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) { pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr(); - if (sqttMgr == NULL) return HSA_STATUS_ERROR; pm4_profile::ThreadTraceConfig sqtt_config; sqttMgr->InitThreadTraceConfig(&sqtt_config); if (profile->parameters) { - for (const hsa_ext_amd_aql_profile_parameters_t* p = profile->parameters; + for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters; p < (profile->parameters + profile->parameter_count); ++p) { switch (p->parameter_name) { - case HSA_EXT_AQL_PROFILE_PARAM_COMPUTE_UNIT_TARGET: + case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET: sqtt_config.threadTraceTargetCu = p->value; break; - case HSA_EXT_AQL_PROFILE_PARAM_VM_ID_MASK: + case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK: sqtt_config.threadTraceVmIdMask = p->value; break; - case HSA_EXT_AQL_PROFILE_PARAM_MASK: + case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK: sqtt_config.threadTraceMask = p->value; break; - case HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK: + case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK: sqtt_config.threadTraceTokenMask = p->value; break; - case HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK2: + case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2: sqtt_config.threadTraceTokenMask2 = p->value; break; default: - ERR_LOGGING(logger) << "Bad SQTT parameter name (" << p->parameter_name << ")"; - return HSA_STATUS_ERROR; + ERR_LOGGING << "Bad SQTT parameter name (" << p->parameter_name << ")"; + return HSA_STATUS_ERROR_INVALID_ARGUMENT; } } } @@ -213,10 +281,9 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start( sqttMgr->setSqttDataBuff((uint8_t*)profile->output_buffer.ptr, profile->output_buffer.size); + // Control buffer registering const uint32_t status_size = sqttMgr->StatusSizeInfo(); void* status_ptr = cmdBufMgr.setPostfix(status_size); - if (status_ptr == NULL) return HSA_STATUS_ERROR; - // Control buffer registering sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr); // Generate start commands @@ -224,10 +291,12 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start( cmdBufMgr.setPreSize(commands.Size()); // Generate stop commands sqttMgr->StopSession(&commands, cmdWriter); - } else - return HSA_STATUS_ERROR; + } else { + ERR_LOGGING << "Bad profile type (" << profile->type << ")"; + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } - if (!cmdBufMgr.checkTotalSize(commands.Size())) return HSA_STATUS_ERROR; + cmdBufMgr.checkTotalSize(commands.Size()); const aql_profile::descriptor_t pre_descr = cmdBufMgr.getPreDescr(); const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr(); @@ -236,7 +305,7 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start( // Populate start aql packet aql_profile::populateAql(pre_descr.ptr, pre_descr.size, cmdWriter, aql_start_packet); } catch (std::exception& e) { - ERR_LOGGING(logger) << e.what(); + ERR_LOGGING << e.what(); return HSA_STATUS_ERROR; } @@ -244,25 +313,18 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start( } // Method to populate the provided AQL packet with profiling stop commands -PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_stop( - const hsa_ext_amd_aql_profile_profile_t* profile, aql_profile::packet_t* aql_stop_packet) { - aql_profile::Logger logger; - +PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_stop(const hsa_ven_amd_aqlprofile_profile_t* profile, + aql_profile::packet_t* aql_stop_packet) { try { aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile); - if (pm4_factory == NULL) return HSA_STATUS_ERROR; - pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter(); - if (cmdWriter == NULL) return HSA_STATUS_ERROR; - aql_profile::CommandBufferMgr cmdBufMgr(profile); - if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR; - const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr(); // Populate stop aql packet + const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr(); aql_profile::populateAql(post_descr.ptr, post_descr.size, cmdWriter, aql_stop_packet); } catch (std::exception& e) { - ERR_LOGGING(logger) << e.what(); + ERR_LOGGING << e.what(); return HSA_STATUS_ERROR; } @@ -271,9 +333,7 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_stop( // Legacy devices, converting of the profiling AQL packet to PM4 packet blob PUBLIC_API hsa_status_t -hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) { - aql_profile::Logger logger; - +hsa_ven_amd_aqlprofile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) { try { // Populate GFX8 pm4 packet blob // Adding HSA barrier acquire packet @@ -283,7 +343,7 @@ hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, // Adding HSA barrier release packet data = aql_profile::legacyAqlRelease(aql_packet, data); } catch (std::exception& e) { - ERR_LOGGING(logger) << e.what(); + ERR_LOGGING << e.what(); return HSA_STATUS_ERROR; } @@ -292,34 +352,33 @@ hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, // Method for getting the profile info PUBLIC_API hsa_status_t -hsa_ext_amd_aql_profile_get_info(const hsa_ext_amd_aql_profile_profile_t* profile, - hsa_ext_amd_aql_profile_info_type_t attribute, void* value) { +hsa_ven_amd_aqlprofile_get_info(const hsa_ven_amd_aqlprofile_profile_t* profile, + hsa_ven_amd_aqlprofile_info_type_t attribute, void* value) { hsa_status_t status = HSA_STATUS_SUCCESS; - aql_profile::Logger logger; try { switch (attribute) { - case HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE: + case HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE: *(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh break; - case HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE: + case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE: *(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh break; - case HSA_EXT_AQL_PROFILE_INFO_PMC_DATA: - reinterpret_cast(value)->pmc_data.result = 0; - status = hsa_ext_amd_aql_profile_iterate_data(profile, - aql_profile::default_pmcdata_callback, value); + case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA: + reinterpret_cast(value)->pmc_data.result = 0; + status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::default_pmcdata_callback, + value); break; - case HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA: - status = hsa_ext_amd_aql_profile_iterate_data( - profile, aql_profile::default_sqttdata_callback, value); + case HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA: + status = hsa_ven_amd_aqlprofile_iterate_data(profile, + aql_profile::default_sqttdata_callback, value); break; default: status = HSA_STATUS_ERROR_INVALID_ARGUMENT; - ERR_LOGGING(logger) << "Invalid attribute (" << attribute << ")"; + ERR_LOGGING << "Invalid attribute (" << attribute << ")"; } } catch (std::exception& e) { - ERR_LOGGING(logger) << e.what(); + ERR_LOGGING << e.what(); return HSA_STATUS_ERROR; } @@ -328,29 +387,26 @@ hsa_ext_amd_aql_profile_get_info(const hsa_ext_amd_aql_profile_profile_t* profil // Method for iterating the events output data PUBLIC_API hsa_status_t -hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* profile, - hsa_ext_amd_aql_profile_data_callback_t callback, void* data) { +hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile, + hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) { hsa_status_t status = HSA_STATUS_SUCCESS; - aql_profile::Logger logger; try { aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile); - if (pm4_factory == NULL) return HSA_STATUS_ERROR; - if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_PMC) { + if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) { uint32_t info_size = 0; void* info_data; uint64_t* samples = (uint64_t*)profile->output_buffer.ptr; const uint32_t sample_count = profile->output_buffer.size / sizeof(uint64_t); uint32_t sample_index = 0; - pm4_profile::Pmu* pmcMgr = pm4_factory->getPmcMgr(); - if (pmcMgr == NULL) return HSA_STATUS_ERROR; + pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr(); - for (const hsa_ext_amd_aql_profile_event_t* p = profile->events; - p < (profile->events + profile->event_count); ++p) { - const pm4_profile::GpuBlockInfo* block_info = pm4_factory->getBlockInfo(p); - if (block_info == NULL) return HSA_STATUS_ERROR; + aql_profile::EventsVec eventsVec = EventsVecCreate(profile, pm4_factory); + for (aql_profile::EventsVec::const_iterator it = eventsVec.begin(); it != eventsVec.end(); + ++it) { + const hsa_ven_amd_aqlprofile_event_t* p = *it; const pm4_profile::CntlMethod method = pm4_factory->getBlockInfo(p)->method; // A perfcounter data sample per ShaderEngine const uint32_t block_samples_count = (method == pm4_profile::CntlMethodBySe || @@ -359,46 +415,52 @@ hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* pr : 1; for (uint32_t i = 0; i < block_samples_count; ++i) { assert(sample_index < sample_count); - if (sample_index >= sample_count) return HSA_STATUS_ERROR; + if (sample_index >= sample_count) { + ERR_LOGGING << "Bad sample index (" << sample_index << "/" << sample_count << ")"; + return HSA_STATUS_ERROR; + } - hsa_ext_amd_aql_profile_info_data_t sample_info; + hsa_ven_amd_aqlprofile_info_data_t sample_info; sample_info.sample_id = i; sample_info.pmc_data.event = *p; sample_info.pmc_data.result = samples[sample_index]; - status = callback(HSA_EXT_AQL_PROFILE_INFO_PMC_DATA, &sample_info, data); + status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data); if (status == HSA_STATUS_INFO_BREAK) { status = HSA_STATUS_SUCCESS; break; } if (status != HSA_STATUS_SUCCESS) { - ERR_LOGGING(logger) << "PMC data callback error, sample_id(" << i << ") status(" - << status << ")"; + ERR_LOGGING << "PMC data callback error, sample_id(" << i << ") status(" << status + << ")"; break; } ++sample_index; } } - } else if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_SQTT) { + } else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) { pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr(); - if (sqttMgr == NULL) return HSA_STATUS_ERROR; - aql_profile::CommandBufferMgr cmdBufMgr(profile); - if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR; - const uint32_t status_size = sqttMgr->StatusSizeInfo(); // Control buffer was allocated as the CmdBuffer postfix partition + const uint32_t status_size = sqttMgr->StatusSizeInfo(); void* status_ptr = cmdBufMgr.setPostfix(status_size); - if (status_ptr == NULL) return HSA_STATUS_ERROR; // Control buffer registering sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr); // Validate SQTT status and normalize WRPTR - if (sqttMgr->Validate() == false) return HSA_STATUS_ERROR; + if (sqttMgr->Validate() == false) { + ERR_LOGGING << "SQTT data corrupted"; + return HSA_STATUS_ERROR; + } const uint32_t se_number = sqttMgr->getNumSe(); // Casting status pointer to SQTT control per ShaderEngine array aql_profile::sqtt_ctrl_t* sqtt_ctrl = (aql_profile::sqtt_ctrl_t*)status_ptr; - assert(status_size == sizeof(aql_profile::sqtt_ctrl_t) * se_number); - if (status_size != sizeof(aql_profile::sqtt_ctrl_t) * se_number) { + const uint32_t status_size_exp = sizeof(aql_profile::sqtt_ctrl_t) * se_number; + assert(status_size == status_size_exp); + if (status_size != status_size_exp) { + ERR_LOGGING << "Bad SQTT controll data structure" + << ", status_size(" << status_size << "), status_size_exp(" << status_size_exp + << "), se_number(" << se_number << ")"; return HSA_STATUS_ERROR; } // SQTT output buffer and capacity per ShaderEngine @@ -410,29 +472,29 @@ hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* pr // written by hardware. The index is incremented by size of 32 bytes. uint32_t sample_size = sqtt_ctrl[i].writePtr * TT_WRITE_PTR_BLK; - hsa_ext_amd_aql_profile_info_data_t sample_info; + hsa_ven_amd_aqlprofile_info_data_t sample_info; sample_info.sample_id = i; sample_info.sqtt_data.ptr = sample_ptr; sample_info.sqtt_data.size = sample_size; - status = callback(HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA, &sample_info, data); + status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA, &sample_info, data); if (status == HSA_STATUS_INFO_BREAK) { status = HSA_STATUS_SUCCESS; break; } if (status != HSA_STATUS_SUCCESS) { - ERR_LOGGING(logger) << "SQTT data callback error, sample_id(" << i << ") status(" - << status << ")"; + ERR_LOGGING << "SQTT data callback error, sample_id(" << i << ") status(" << status + << ")"; break; } sample_ptr += sample_capacity; } } else { - ERR_LOGGING(logger) << "Bad profile type (" << profile->type << ")"; - status = HSA_STATUS_ERROR; + ERR_LOGGING << "Bad profile type (" << profile->type << ")"; + status = HSA_STATUS_ERROR_INVALID_ARGUMENT; } } catch (std::exception& e) { - ERR_LOGGING(logger) << e.what(); + ERR_LOGGING << e.what(); return HSA_STATUS_ERROR; } diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile.h new file mode 100644 index 0000000000..a7e32f6ab9 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile.h @@ -0,0 +1,40 @@ +#ifndef _AQL_PROFILE_H_ +#define _AQL_PROFILE_H_ + +#include +#include + +#include "hsa_ven_amd_aqlprofile.h" +#include "aql_profile_exception.h" + +namespace pm4_profile { +class CommandWriter; +} + +namespace aql_profile { +typedef hsa_ven_amd_aqlprofile_descriptor_t descriptor_t; +typedef hsa_ven_amd_aqlprofile_profile_t profile_t; +typedef hsa_ven_amd_aqlprofile_info_type_t info_type_t; +typedef hsa_ven_amd_aqlprofile_data_callback_t data_callback_t; +typedef hsa_ext_amd_aql_pm4_packet_t packet_t; +typedef hsa_ven_amd_aqlprofile_event_t event_t; + +void populateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_profile::CommandWriter* cmd_writer, + packet_t* aql_packet); +void* legacyAqlAcquire(const packet_t* aql_packet, void* data); +void* legacyAqlRelease(const packet_t* aql_packet, void* data); +void* legacyPm4(const packet_t* aql_packet, void* data); + +class event_exception : public aql_profile_exc_val { + public: + event_exception(const std::string& m, const event_t& ev) : aql_profile_exc_val(m, ev) {} +}; + +static std::ostream& operator<<(std::ostream& os, const event_t& ev) { + os << "event( block(" << ev.block_name << "." << ev.block_index << "), Id(" << ev.counter_id + << "))"; + return os; +} +} // namespace aql_profile + +#endif // _AQL_PROFILE_H_ diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile_exception.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile_exception.h new file mode 100644 index 0000000000..210ab40ab6 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/aql_profile_exception.h @@ -0,0 +1,34 @@ +#ifndef _AQL_PROFILE_EXCEPTION_H_ +#define _AQL_PROFILE_EXCEPTION_H_ + +#include + +#include +#include + +namespace aql_profile { + +class aql_profile_exc_msg : public std::exception { + public: + explicit aql_profile_exc_msg(const std::string& msg) : str(msg) {} + virtual const char* what() const throw() { return str.c_str(); } + + protected: + std::string str; +}; + +template class aql_profile_exc_val : public std::exception { + public: + aql_profile_exc_val(const std::string& msg, const T& val) { + std::ostringstream oss; + oss << msg << "(" << val << ")"; + str = oss.str(); + } + virtual const char* what() const throw() { return str.c_str(); } + + protected: + std::string str; +}; +} // namespace aql_profile + +#endif // _AQL_PROFILE_EXCEPTION_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/gfx8_factory.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/gfx8_factory.cpp similarity index 72% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/gfx8_factory.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/gfx8_factory.cpp index a0ecbc4107..0694e085c2 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/gfx8_factory.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/gfx8_factory.cpp @@ -11,7 +11,7 @@ namespace aql_profile { // GFX9 block ID mapping table -uint32_t Gfx8Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = { +uint32_t Gfx8Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = { pm4_profile::kHsaViCounterBlockIdCb0, pm4_profile::kHsaViCounterBlockIdCpf, pm4_profile::kHsaViCounterBlockIdDb0, pm4_profile::kHsaViCounterBlockIdGrbm, pm4_profile::kHsaViCounterBlockIdGrbmSe, pm4_profile::kHsaViCounterBlockIdPaSu, @@ -30,11 +30,21 @@ uint32_t Gfx8Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = { pm4_profile::kHsaViCounterBlockIdCpc}; pm4_profile::CommandWriter* Gfx8Factory::getCommandWriter() { - return new pm4_profile::gfx8::Gfx8CmdWriter(false, true); + auto p = new pm4_profile::gfx8::Gfx8CmdWriter(false, true); + if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed"); + return p; } -pm4_profile::Pmu* Gfx8Factory::getPmcMgr() { return new pm4_profile::Gfx8PerfCounter(); } +pm4_profile::PerfCounter* Gfx8Factory::getPmcMgr() { + auto p = new pm4_profile::Gfx8PerfCounter(); + if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed"); + return p; +} -pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() { return new pm4_profile::Gfx8ThreadTrace(); } +pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() { + auto p = new pm4_profile::Gfx8ThreadTrace(); + if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed"); + return p; +} } // aql_profile diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/gfx9_factory.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/gfx9_factory.cpp similarity index 70% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/gfx9_factory.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/gfx9_factory.cpp index 7a293a5e90..c6d51a3cad 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/gfx9_factory.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/gfx9_factory.cpp @@ -11,7 +11,7 @@ namespace aql_profile { // GFX9 block ID mapping table -uint32_t Gfx9Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = { +uint32_t Gfx9Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = { pm4_profile::kHsaAiCounterBlockIdCb0, kBadBlockId /*CPF*/, pm4_profile::kHsaAiCounterBlockIdDb0, @@ -42,14 +42,24 @@ uint32_t Gfx9Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = { pm4_profile::kHsaAiCounterBlockIdTcs, pm4_profile::kHsaAiCounterBlockIdWd, kBadBlockId /*CPG*/, - kBadBlockId /*CPC*/}; + pm4_profile::kHsaAiCounterBlockIdCpc}; pm4_profile::CommandWriter* Gfx9Factory::getCommandWriter() { - return new pm4_profile::gfx9::Gfx9CmdWriter(false, true); + auto p = new pm4_profile::gfx9::Gfx9CmdWriter(false, true); + if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed"); + return p; } -pm4_profile::Pmu* Gfx9Factory::getPmcMgr() { return new pm4_profile::Gfx9PerfCounter(); } +pm4_profile::PerfCounter* Gfx9Factory::getPmcMgr() { + auto p = new pm4_profile::Gfx9PerfCounter(); + if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed"); + return p; +} -pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() { return new pm4_profile::Gfx9ThreadTrace(); } +pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() { + auto p = new pm4_profile::Gfx9ThreadTrace(); + if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed"); + return p; +} } // aql_profile diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/legacy_pm4.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/legacy_pm4.cpp similarity index 99% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/legacy_pm4.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/legacy_pm4.cpp index 9468b3b063..eb4044349d 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/legacy_pm4.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/legacy_pm4.cpp @@ -1,8 +1,9 @@ +#include +#include + #include #include #include -#include -#include #include "aql_profile.h" #include "amd_aql_pm4_ib_packet.h" diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/logger.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/logger.h new file mode 100644 index 0000000000..5e7c8bbaab --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/logger.h @@ -0,0 +1,137 @@ +#ifndef _LOGGER_H_ +#define _LOGGER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace aql_profile { + +class Logger { + public: + template Logger& operator<<(const T& m) { + std::ostringstream oss; + oss << m; + if (!streaming) + log(oss.str()); + else + put(oss.str()); + streaming = true; + return *this; + } + + typedef void (Logger::*manip_t)(); + Logger& operator<<(manip_t f) { + (this->*f)(); + return *this; + } + + void begm() { messaging = true; } + void endl() { resetStreaming(); } + + static const std::string& LastMessage() { + Logger& logger = Instance(); + std::lock_guard lck(mutex); + return logger.message[GetTid()]; + } + + static Logger& Instance() { + std::lock_guard lck(mutex); + if (instance == NULL) instance = new Logger(); + return *instance; + } + + static void Destroy() { + std::lock_guard lck(mutex); + if (instance != NULL) delete instance; + instance = NULL; + } + + private: + static uint32_t GetPid() { return syscall(__NR_getpid); } + static uint32_t GetTid() { return syscall(__NR_gettid); } + + Logger() : file(NULL), dirty(false), streaming(false), messaging(false) { + const char* path = getenv("HSA_VEN_AMD_AQLPROFILE_LOG"); + if (path != NULL) { + file = fopen("/tmp/aql_profile_log.txt", "a"); + } + resetStreaming(); + } + + ~Logger() { + if (file != NULL) { + if (dirty) put("\n"); + fclose(file); + } + } + + void resetStreaming() { + std::lock_guard lck(mutex); + if (messaging) { + message[GetTid()] = ""; + } + messaging = false; + streaming = false; + } + + void put(const std::string& m) { + std::lock_guard lck(mutex); + if (messaging) { + message[GetTid()] += m; + } + if (file != NULL) { + dirty = true; + flock(fileno(file), LOCK_EX); + fprintf(file, "%s", m.c_str()); + fflush(file); + flock(fileno(file), LOCK_UN); + } + } + + void log(const std::string& m) { + const time_t rawtime = time(NULL); + tm tm_info; + localtime_r(&rawtime, &tm_info); + char tm_str[26]; + strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info); + std::ostringstream oss; + oss << "\n<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m; + put(oss.str()); + } + + FILE* file; + bool dirty; + bool streaming; + bool messaging; + + static std::mutex mutex; + static Logger* instance; + std::map message; +}; + +} // namespace aql_profile + +#define ERR_LOGGING \ + (aql_profile::Logger::Instance() << aql_profile::Logger::endl \ + << "Error: " << __FUNCTION__ \ + << "(): " << aql_profile::Logger::begm) +#define INFO_LOGGING \ + (aql_profile::Logger::Instance() << aql_profile::Logger::endl \ + << "Info: " << __FUNCTION__ \ + << "(): " << aql_profile::Logger::begm) + +#endif // _LOGGER_H_ diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/pm4_factory.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/pm4_factory.h new file mode 100644 index 0000000000..0e8b320d0f --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/pm4_factory.h @@ -0,0 +1,157 @@ +#ifndef _PM4_FACTORY_H_ +#define _PM4_FACTORY_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#include "aql_profile.h" +#include "gpu_block_info.h" +#include "aql_profile_exception.h" + +namespace pm4_profile { +class CommandWriter; +class PerfCounter; +class ThreadTrace; +extern GpuBlockInfo Gfx9HwBlocks[]; +extern const uint32_t Gfx9HwBlockCount; +extern GpuBlockInfo Gfx8HwBlocks[]; +extern const uint32_t Gfx8HwBlockCount; +} + +namespace aql_profile { + +class BlockMap { + public: + typedef std::map map_t; + typedef map_t::const_iterator iter_t; + + void init(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) { + if (block_map.size() == 0) fill(id_table, info_table, info_count); + } + + const pm4_profile::GpuBlockInfo* get(const uint32_t& id) const { + iter_t it = block_map.find(id); + return (it != block_map.end()) ? it->second : NULL; + } + + private: + void fill(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) { + map_t info_map; + for (uint32_t i = 0; i < info_count; ++i) { + const pm4_profile::GpuBlockInfo& entry = info_table[i]; + info_map[entry.counterGroupId] = &entry; + } + for (uint32_t i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) { + iter_t it = info_map.find(id_table[i]); + if (it != info_map.end()) block_map[i] = it->second; + } + } + + map_t block_map; +}; + +class Pm4Factory { + public: + enum { kBadBlockId = UINT_MAX }; + + static Pm4Factory* Create(const hsa_agent_t agent); + static Pm4Factory* Create(const profile_t* profile) { return Create(profile->agent); } + static void Destroy(); + + virtual pm4_profile::CommandWriter* getCommandWriter() = 0; + virtual pm4_profile::PerfCounter* getPmcMgr() = 0; + virtual pm4_profile::ThreadTrace* getSqttMgr() = 0; + + const pm4_profile::GpuBlockInfo* getBlockInfo(const event_t* event) const { + const pm4_profile::GpuBlockInfo* info = block_map.get(event->block_name); + if (info == NULL) throw event_exception(std::string("Bad block, "), *event); + if (event->block_index >= info->maxInstanceCount) + throw event_exception(std::string("Bad block index, "), *event); + if (event->counter_id > info->maxEventId) + throw event_exception(std::string("Bad event ID, "), *event); + return info; + } + + uint32_t getBlockId(const event_t* event) const { + return getBlockInfo(event)->counterGroupId + event->block_index; + } + + protected: + explicit Pm4Factory(const BlockMap& map) : block_map(map) {} + virtual ~Pm4Factory() {} + + private: + typedef std::map instances_t; + + static std::mutex mutex; + static instances_t instances; + const BlockMap& block_map; +}; + +class Gfx8Factory : public Pm4Factory { + public: + Gfx8Factory() : Pm4Factory(block_map) { + block_map.init(block_id_table, pm4_profile::Gfx8HwBlocks, pm4_profile::Gfx8HwBlockCount); + } + pm4_profile::CommandWriter* getCommandWriter(); + pm4_profile::PerfCounter* getPmcMgr(); + pm4_profile::ThreadTrace* getSqttMgr(); + + private: + static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + BlockMap block_map; +}; + +class Gfx9Factory : public Pm4Factory { + public: + Gfx9Factory() : Pm4Factory(block_map) { + block_map.init(block_id_table, pm4_profile::Gfx9HwBlocks, pm4_profile::Gfx9HwBlockCount); + } + pm4_profile::CommandWriter* getCommandWriter(); + pm4_profile::PerfCounter* getPmcMgr(); + pm4_profile::ThreadTrace* getSqttMgr(); + + private: + static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + BlockMap block_map; +}; + +inline Pm4Factory* Pm4Factory::Create(const hsa_agent_t agent) { + std::lock_guard lck(mutex); + + char agent_name[64]; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name); + instances_t::iterator it = instances.find(agent_name); + + if (it == instances.end()) { + if (strncmp(agent_name, "gfx801", 6) == 0) { + throw aql_profile_exc_val(std::string("GFX8 Carrizo is not supported "), + agent_name); + } else if (strncmp(agent_name, "gfx8", 4) == 0) { + it->second = new Gfx8Factory(); + } else if (strncmp(agent_name, "gfx9", 4) == 0) { + it->second = new Gfx9Factory(); + } else { + throw aql_profile_exc_val("Unsupported GFXIP", agent_name); + } + } + + if (it->second == NULL) throw aql_profile_exc_msg("Pm4Factory allocation failed"); + return it->second; +} + +inline void Pm4Factory::Destroy() { + std::lock_guard lck(mutex); + for (auto it : instances) delete it.second; + instances.clear(); +} + +} // namespace aql_profile + +#endif // _PM4_FACTORY_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/populate_aql.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/populate_aql.cpp similarity index 99% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/populate_aql.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/populate_aql.cpp index 24aaaf49d5..502b84b780 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/populate_aql.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/core/populate_aql.cpp @@ -1,7 +1,8 @@ +#include + #include #include #include -#include #include "aql_profile.h" #include "cmdwriter.h" diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/CMakeLists.txt similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/CMakeLists.txt diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_block_info.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_block_info.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_block_info.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_block_info.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_block_info.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_block_info.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_block_info.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_block_info.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_perf_counter.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_perf_counter.cpp similarity index 98% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_perf_counter.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_perf_counter.cpp index 729a4aeef4..f25d5ab30b 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_perf_counter.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_perf_counter.cpp @@ -18,14 +18,6 @@ using namespace pm4_profile; namespace pm4_profile { -static char errorString[][64] = {{"No error"}, - {"unknow countergroup id"}, - {"no countergroup id"}, - {"invalid operation"}, - {"counter is not available"}, - {"countegroup error state"}, - {"countegroup is not completed"}}; - Gfx8PerfCounter::Gfx8PerfCounter() { // Initialize the number of shader engines num_se_ = 4; @@ -33,8 +25,6 @@ Gfx8PerfCounter::Gfx8PerfCounter() { } void Gfx8PerfCounter::Init() { - error_code_ = 0; - // Initialize the value to use in resetting GRBM regGRBM_GFX_INDEX grbm_gfx_index; grbm_gfx_index.u32All = 0; @@ -49,6 +39,10 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, // Reset Grbm to its default state - broadcast cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, reset_grbm_); + // Reset the counter list + regCP_PERFMON_CNTL cp_perfmon_cntl = {0}; + cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL__CI__VI, cp_perfmon_cntl.u32All); + // Iterate through the list of blocks to generate Pm4 commands to // program corresponding perf counters of each block for (CountersMap::const_iterator block_it = countersMap.begin(); block_it != countersMap.end(); @@ -60,7 +54,6 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, // Iterate through each enabled perf counter and building // corresponding Pm4 commands to program the various control // registers involved - for (uint32_t ind = 0; ind < counter_count; ++ind) { const uint32_t counter_id = counters[ind]; @@ -89,9 +82,6 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, cp_perfcount_enable.u32All); // Reset the counter list - regCP_PERFMON_CNTL cp_perfmon_cntl; - cp_perfmon_cntl.u32All = 0; - cp_perfmon_cntl.bits.PERFMON_STATE = 0; cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL__CI__VI, cp_perfmon_cntl.u32All); // Start the counter list @@ -148,16 +138,6 @@ uint32_t Gfx8PerfCounter::end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, return total_counter_num * sizeof(uint32_t); } -int Gfx8PerfCounter::getLastError() { return error_code_; } - -std::string Gfx8PerfCounter::getErrorString(int error) { - if ((error >= 0) && (error < kErrorCodeMax)) { - std::string err_string(errorString[error]); - return err_string; - } - return string("Error input code!"); -} - uint32_t Gfx8PerfCounter::ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId, uint32_t blkCntrIdx) { regGRBM_GFX_INDEX grbm_gfx_index; diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_perf_counter.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_perf_counter.h similarity index 94% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_perf_counter.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_perf_counter.h index 81ba9148d8..ed3c39681d 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx8_perf_counter.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx8_perf_counter.h @@ -11,7 +11,7 @@ class CommandWriter; // This class implement the VI PMU. It is responsible for setting up // CounterGroups to represent each VI hardware block which exposes performance // counters. -class Gfx8PerfCounter : public pm4_profile::Pmu { +class Gfx8PerfCounter : public pm4_profile::PerfCounter { public: Gfx8PerfCounter(); @@ -19,10 +19,6 @@ class Gfx8PerfCounter : public pm4_profile::Pmu { // for the blocks featured shader engines instancing uint32_t getNumSe() { return num_se_; } - int getLastError(); - - std::string getErrorString(int error); - void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap); uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap, @@ -64,8 +60,6 @@ class Gfx8PerfCounter : public pm4_profile::Pmu { uint32_t* reg_val); private: - int error_code_; - // Indicates the number of Shader Engines Present uint32_t num_se_; diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_block_info.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_block_info.cpp similarity index 99% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_block_info.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_block_info.cpp index 0843b852d6..bacefc0b01 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_block_info.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_block_info.cpp @@ -56,7 +56,7 @@ GpuBlockInfo Gfx9HwBlocks[] = { AI_COUNTER_NUM_PER_SPI, 0, 0, true, 0, 0, false, 0, 0}, // Counter block SQ - {"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298, + {"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 171, AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0}, {"AI_SQ_GS", kHsaAiCounterBlockIdSqGs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298, AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0}, @@ -251,10 +251,8 @@ GpuBlockInfo Gfx9HwBlocks[] = { // Counter block CPC // Temp commented for Vega10 - /* - {"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 24, - AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0}, - */ + {"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 34, + AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0}, // Counter block IOMMUV2 {"AI_IOMMUV2", kHsaAiCounterBlockIdIommuV2, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 25, diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_block_info.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_block_info.h similarity index 97% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_block_info.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_block_info.h index c5ef546d53..078ef60145 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_block_info.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_block_info.h @@ -192,8 +192,7 @@ typedef enum HsaAiCounterBlockId { // Temp commented out for Vega10 // kHsaAiCounterBlockIdCpg, - // Temp commented out for Vega10 - // kHsaAiCounterBlockIdCpc, + kHsaAiCounterBlockIdCpc, // Counters retrieved by KFD kHsaAiCounterBlockIdIommuV2, @@ -233,12 +232,9 @@ extern GpuCounterRegInfo AiVgtCounterRegAddr[]; extern GpuCounterRegInfo AiIaCounterRegAddr[]; extern GpuCounterRegInfo AiMcCounterRegAddr[]; extern GpuCounterRegInfo AiSrbmCounterRegAddr[]; - // No Tcs Counter block on AI // extern GpuCounterRegInfo AiTcsCounterRegAddr[]; extern GpuCounterRegInfo AiWdCounterRegAddr[]; -extern GpuCounterRegInfo AiCpgCounterRegAddr[]; -extern GpuCounterRegInfo AiCpcCounterRegAddr[]; extern GpuPrivCounterBlockId AiBlockIdSq; extern GpuPrivCounterBlockId AiBlockIdMc; diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_perf_counter.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_perf_counter.cpp similarity index 97% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_perf_counter.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_perf_counter.cpp index f5d34f49eb..eb5e30cab0 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_perf_counter.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_perf_counter.cpp @@ -19,14 +19,6 @@ using namespace pm4_profile::gfx9; namespace pm4_profile { -static char errorString[][64] = {{"No error"}, - {"unknow countergroup id"}, - {"no countergroup id"}, - {"invalid operation"}, - {"counter is not available"}, - {"countegroup error state"}, - {"countegroup is not completed"}}; - Gfx9PerfCounter::Gfx9PerfCounter() { // Initialize the number of shader engines num_se_ = 4; @@ -34,8 +26,6 @@ Gfx9PerfCounter::Gfx9PerfCounter() { } void Gfx9PerfCounter::Init() { - error_code_ = 0; - // Initialize the value to use in resetting GRBM regGRBM_GFX_INDEX grbm_gfx_index; grbm_gfx_index.u32All = 0; @@ -54,6 +44,10 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, // On Vega this is needed to collect Perf Cntrs cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL, 1); + // Reset the counter list + regCP_PERFMON_CNTL cp_perfmon_cntl = {0}; + cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL, cp_perfmon_cntl.u32All); + // Iterate through the list of blocks to generate Pm4 commands to // program corresponding perf counters of each block for (CountersMap::const_iterator block_it = countersMap.begin(); block_it != countersMap.end(); @@ -65,7 +59,6 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, // Iterate through each enabled perf counter and building // corresponding Pm4 commands to program the various control // registers involved - for (uint32_t ind = 0; ind < counter_count; ++ind) { const uint32_t counter_id = counters[ind]; @@ -93,9 +86,6 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, cmdWriter->BuildWriteShRegPacket(cmdBuff, mmCOMPUTE_PERFCOUNT_ENABLE, cp_perfcount_enable.u32All); // Reset the counter list - regCP_PERFMON_CNTL cp_perfmon_cntl; - cp_perfmon_cntl.u32All = 0; - cp_perfmon_cntl.bits.PERFMON_STATE = 0; cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL, cp_perfmon_cntl.u32All); // Start the counter list @@ -156,16 +146,6 @@ uint32_t Gfx9PerfCounter::end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, return total_counter_num * sizeof(uint32_t); } -int Gfx9PerfCounter::getLastError() { return error_code_; } - -std::string Gfx9PerfCounter::getErrorString(int error) { - if ((error >= 0) && (error < kErrorCodeMax)) { - std::string err_string(errorString[error]); - return err_string; - } - return string("Error input code!"); -} - uint32_t Gfx9PerfCounter::ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId, uint32_t blkCntrIdx) { regGRBM_GFX_INDEX grbm_gfx_index; @@ -737,17 +717,15 @@ uint32_t Gfx9PerfCounter::BuildCounterSelRegister(uint32_t cntrIdx, uint32_t* re */ // Temp commented for Vega10 - /* case kHsaAiCounterBlockIdCpc: { regCPC_PERFCOUNTER0_SELECT cpc_perf_counter_select; cpc_perf_counter_select.u32All = 0; - cpc_perf_counter_select.bits.PERF_SEL = blkCntrIdx; + cpc_perf_counter_select.bits.CNTR_SEL0 = blkCntrIdx; regVal[0] = cpc_perf_counter_select.u32All; regAddr[0] = AiCpcCounterRegAddr[cntrIdx].counterSelRegAddr; regIdx = 1; break; } - */ /* case kHsaAiCounterBlockIdMc: { @@ -1316,7 +1294,6 @@ uint32_t Gfx9PerfCounter::BuildCounterReadRegisters(uint32_t reg_index, uint32_t */ // Temp commented for Vega10 - /* case kHsaAiCounterBlockIdCpc: { reg_addr[reg_num] = mmGRBM_GFX_INDEX; reg_val[reg_num] = reset_grbm_; @@ -1331,7 +1308,6 @@ uint32_t Gfx9PerfCounter::BuildCounterReadRegisters(uint32_t reg_index, uint32_t reg_num++; break; } - */ // IommuV2, MC, kernel driver counters are retrieved via // KFD implementation diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_perf_counter.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_perf_counter.h similarity index 90% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_perf_counter.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_perf_counter.h index 14bd82968d..ddebd331a7 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gfx9_perf_counter.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gfx9_perf_counter.h @@ -11,23 +11,17 @@ class CommandWriter; // This class implement the AI PMU. It is responsible for setting up // CounterGroups to represent each AI hardware block which exposes performance // counters. -class Gfx9PerfCounter : public pm4_profile::Pmu { +class Gfx9PerfCounter : public pm4_profile::PerfCounter { public: Gfx9PerfCounter(); - // Returns number of shader engines per block - // for the blocks featured shader engines instancing - uint32_t getNumSe() { return num_se_; } - - int getLastError(); - - std::string getErrorString(int error); - void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap); uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap, void* dataBuff); + uint32_t getNumSe() { return num_se_; } + private: void Init(); @@ -64,8 +58,6 @@ class Gfx9PerfCounter : public pm4_profile::Pmu { uint32_t* reg_val); private: - int error_code_; - // Indicates the number of Shader Engines Present uint32_t num_se_; diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gpu_block_info.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gpu_block_info.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/gpu_block_info.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/gpu_block_info.h diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/perf_counter.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/perf_counter.h new file mode 100644 index 0000000000..601a86de87 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/perfcounter/perf_counter.h @@ -0,0 +1,35 @@ +#ifndef _HSA_PERF_H_ +#define _HSA_PERF_H_ + +#include + +#include +#include +#include + +namespace pm4_profile { +class DefaultCmdBuf; +class CommandWriter; + +typedef std::vector CountersVec; +typedef std::map CountersMap; + +class PerfCounter { + public: + virtual ~PerfCounter() {} + + // Generate start profiling commands. + virtual void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, + const CountersMap& countersMap) = 0; + + // Generate stop profiling commands. + // Return actual required data buffer size. + virtual uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, + const CountersMap& countersMap, void* dataBuff) = 0; + + // Returns number of shader engines per block + // for the blocks featured shader engines instancing + virtual uint32_t getNumSe() = 0; +}; +} // namespace pm4_profile +#endif // _HSA_PERF_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/CMakeLists.txt similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/CMakeLists.txt diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx8_thread_trace.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx8_thread_trace.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx8_thread_trace.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx8_thread_trace.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx8_thread_trace.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx8_thread_trace.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx8_thread_trace.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx8_thread_trace.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx9_thread_trace.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx9_thread_trace.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx9_thread_trace.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx9_thread_trace.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx9_thread_trace.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx9_thread_trace.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/gfx9_thread_trace.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/gfx9_thread_trace.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/thread_trace.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/thread_trace.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/thread_trace.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/thread_trace.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/thread_trace.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/thread_trace.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/threadtrace/thread_trace.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/src/threadtrace/thread_trace.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/CMakeLists.txt similarity index 92% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/CMakeLists.txt index eaa480f644..0b4e0461db 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/CMakeLists.txt @@ -28,7 +28,6 @@ include_directories ( ${TEST_DIR}/${TEST_NAME} ) set ( LIB_NAME "${TEST_NAME}${ONLY64STR}" ) add_library ( ${LIB_NAME} STATIC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp ) target_link_libraries( ${LIB_NAME} c stdc++ ) -execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" ) set ( TEST_LIBS ${LIB_NAME} ) # @@ -37,7 +36,13 @@ set ( TEST_LIBS ${LIB_NAME} ) set ( SRC_LIST ${TEST_DIR}/ctrl/test.cpp ) set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_pmgr.cpp ) set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_hsa.cpp ) -set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} ${TARGET_LIB} ) +set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} ) set ( EXE_NAME "ctrl" ) add_executable ( ${EXE_NAME} ${SRC_LIST} ) target_link_libraries( ${EXE_NAME} ${LIB_LIST} c stdc++ dl pthread rt atomic ) + +# +# Copy the test files +# +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" ) +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" ) diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/binary_search/binary_search.cc b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/binary_search/binary_search.cc similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/binary_search/binary_search.cc rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/binary_search/binary_search.cc diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/binary_search/binary_search_kernels.cl b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/binary_search/binary_search_kernels.cl similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/binary_search/binary_search_kernels.cl rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/binary_search/binary_search_kernels.cl diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_aql.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_aql.h similarity index 97% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_aql.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_aql.h index bee82a5867..4139734afc 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_aql.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_aql.h @@ -30,14 +30,14 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #include "hsa.h" #include "hsa_rsrc_factory.h" -#include "hsa_ext_amd_aql_profile.h" +#include "hsa_ven_amd_aqlprofile.h" // Test AQL interface class TestAql { TestAql* const test_aql; public: - TestAql(TestAql* t = 0) : test_aql(t) {} + explicit TestAql(TestAql* t = 0) : test_aql(t) {} virtual ~TestAql() {} TestAql* testAql() { return test_aql; } diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_assert.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_assert.h similarity index 91% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_assert.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_assert.h index 96292a68ed..37d012720d 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_assert.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_assert.h @@ -6,7 +6,7 @@ if (!(cond)) { \ std::cout << "ASSERT FAILED(" << #cond << ") at \"" << __FILE__ << "\" line " << __LINE__ \ << std::endl; \ - abort(); \ + exit(-1); \ } \ } diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_hsa.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_hsa.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_hsa.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_hsa.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_hsa.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_hsa.h similarity index 98% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_hsa.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_hsa.h index 13a4767671..7ca894de40 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_hsa.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_hsa.h @@ -36,7 +36,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE. class TestHSA : public TestAql { public: // Constructor - TestHSA(TestKernel* test) : test_(test), name_(test->Name()) { + explicit TestHSA(TestKernel* test) : test_(test), name_(test->Name()) { total_time_taken_ = 0; setup_time_taken_ = 0; dispatch_time_taken_ = 0; diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_kernel.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_kernel.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_kernel.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_kernel.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen.h similarity index 96% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen.h index 8102c3cf0f..662dbd2c2e 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen.h @@ -29,18 +29,17 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #define _TEST_PGEN_H_ #include "test_pmgr.h" -#include "hsa_ext_amd_aql_profile.h" // SimpleConvolution: Class implements OpenCL SimpleConvolution sample class TestPGen : public TestPMgr { + protected: typedef hsa_ext_amd_aql_pm4_packet_t packet_t; - protected: packet_t* PrePacket() { return reinterpret_cast(&prePacket); } packet_t* PostPacket() { return reinterpret_cast(&postPacket); } public: - TestPGen(TestAql* t) : TestPMgr(t) {} + explicit TestPGen(TestAql* t) : TestPMgr(t) {} }; #endif // _TEST_PGEN_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen_pmc.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen_pmc.h similarity index 61% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen_pmc.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen_pmc.h index d83591f104..e62b5bb172 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen_pmc.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen_pmc.h @@ -31,11 +31,13 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #include "test_assert.h" #include "test_pgen.h" -hsa_status_t TestPGenPMC_Callback(hsa_ext_amd_aql_profile_info_type_t info_type, - hsa_ext_amd_aql_profile_info_data_t* info_data, +#include + +hsa_status_t TestPGenPMC_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, void* callback_data) { hsa_status_t status = HSA_STATUS_SUCCESS; - typedef std::vector passed_data_t; + typedef std::vector passed_data_t; reinterpret_cast(callback_data)->push_back(*info_data); return status; } @@ -45,29 +47,30 @@ class TestPGenPMC : public TestPGen { const static uint32_t buffer_alignment = 0x1000; // 4K hsa_agent_t agent; - hsa_ext_amd_aql_profile_profile_t profile; - hsa_ext_amd_aql_profile_event_t events[2]; + hsa_ven_amd_aqlprofile_profile_t profile; + hsa_ven_amd_aqlprofile_event_t* events; bool buildPackets() { return true; } bool dumpData() { std::cout << "TestPGenPMC::dumpData :" << std::endl; - typedef std::vector callback_data_t; + typedef std::vector callback_data_t; callback_data_t data; - hsa_ext_amd_aql_profile_iterate_data(&profile, TestPGenPMC_Callback, &data); + api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenPMC_Callback, &data); for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) { - std::cout << "> sample(" << dec << it->sample_id << ") block(" - << it->pmc_data.event.block_name << "_" << it->pmc_data.event.block_index - << ") result(" << hex << it->pmc_data.result << ")" << std::endl; + std::cout << dec << "event( block(" << it->pmc_data.event.block_name << "_" + << it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id + << ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")" + << std::endl; } return true; } public: - TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; } + explicit TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; } bool initialize(int arg_cnt, char** arg_list) { if (!TestPMgr::initialize(arg_cnt, arg_list)) return false; @@ -85,31 +88,43 @@ class TestPGenPMC : public TestPGen { // Instantiation of the profile object // ////////////////////////////////////////////////////////////// // Set the event fields - events[0].block_name = HSA_EXT_AQL_PROFILE_BLOCK_SQ; - events[0].block_index = 0; - events[0].counter_id = 0x4; // SQ_SQ_PERF_SEL_WAVES - events[1].block_name = HSA_EXT_AQL_PROFILE_BLOCK_SQ; - events[1].block_index = 0; - events[1].counter_id = 0xe; // SQ_SQ_PERF_SEL_ITEMS + const hsa_ven_amd_aqlprofile_event_t events_arr[] = { + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 14 /*ITEMS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 47 /*WAVE_READY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 1 /*CYCLE*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 3 /*REQ*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 22 /*WRITEBACK*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS_COUNT*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 8 /*ME1_STALL_WAIT_ON_RCIU_READ*/}, + }; + const size_t event_count = sizeof(events_arr) / sizeof(hsa_ven_amd_aqlprofile_event_t); + events = new hsa_ven_amd_aqlprofile_event_t[event_count]; + memcpy(events, events_arr, sizeof(events_arr)); // Initialization the profile memset(&profile, 0, sizeof(profile)); profile.agent = agent; - profile.type = HSA_EXT_AQL_PROFILE_EVENT_PMC; + profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC; // set enabled events list profile.events = events; - profile.event_count = 2; + profile.event_count = event_count; // Profile buffers attributes command_buffer_alignment = buffer_alignment; - status = hsa_ext_amd_aql_profile_get_info( - &profile, HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size); + status = api.hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size); + if (status != HSA_STATUS_SUCCESS) { + const char* str = ""; + api.hsa_ven_amd_aqlprofile_error_string(&str); + std::cout << "aqlprofile err: " << str << std::endl; + } test_assert(status == HSA_STATUS_SUCCESS); output_buffer_alignment = buffer_alignment; - status = hsa_ext_amd_aql_profile_get_info(&profile, HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE, - &output_buffer_size); + status = api.hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE, &output_buffer_size); test_assert(status == HSA_STATUS_SUCCESS); // Application is allocating the command buffer @@ -128,12 +143,17 @@ class TestPGenPMC : public TestPGen { memset(profile.output_buffer.ptr, 0x77, output_buffer_size); // Populating the AQL start packet - status = hsa_ext_amd_aql_profile_start(&profile, PrePacket()); + status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket()); + if (status != HSA_STATUS_SUCCESS) { + const char* str; + api.hsa_ven_amd_aqlprofile_error_string(&str); + std::cout << "aqlprofile err: " << str << std::endl; + } test_assert(status == HSA_STATUS_SUCCESS); if (status != HSA_STATUS_SUCCESS) return false; // Populating the AQL stop packet - status = hsa_ext_amd_aql_profile_stop(&profile, PostPacket()); + status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket()); test_assert(status == HSA_STATUS_SUCCESS); return (status == HSA_STATUS_SUCCESS); diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen_sqtt.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen_sqtt.h similarity index 85% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen_sqtt.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen_sqtt.h index 6cdf0ce96e..88f603c468 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pgen_sqtt.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pgen_sqtt.h @@ -31,15 +31,16 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "test_assert.h" #include "test_pgen.h" -hsa_status_t TestPGenSQTT_Callback(hsa_ext_amd_aql_profile_info_type_t info_type, - hsa_ext_amd_aql_profile_info_data_t* info_data, +hsa_status_t TestPGenSQTT_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, void* callback_data) { hsa_status_t status = HSA_STATUS_SUCCESS; - typedef std::vector passed_data_t; + typedef std::vector passed_data_t; reinterpret_cast(callback_data)->push_back(*info_data); return status; } @@ -50,17 +51,17 @@ class TestPGenSQTT : public TestPGen { const static uint32_t buffer_size = 0x2000000; // 32M hsa_agent_t agent; - hsa_ext_amd_aql_profile_profile_t profile; + hsa_ven_amd_aqlprofile_profile_t profile; bool buildPackets() { return true; } bool dumpData() { std::cout << "TestPGenSQTT::dumpData :" << std::endl; - typedef std::vector callback_data_t; + typedef std::vector callback_data_t; callback_data_t data; - hsa_ext_amd_aql_profile_iterate_data(&profile, TestPGenSQTT_Callback, &data); + api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenSQTT_Callback, &data); for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) { std::cout << "> sample(" << dec << it->sample_id << ") ptr(" << hex << it->sqtt_data.ptr << ") size(" << dec << it->sqtt_data.size << ")" << std::endl; @@ -93,7 +94,7 @@ class TestPGenSQTT : public TestPGen { } public: - TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; } + explicit TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; } bool initialize(int arg_cnt, char** arg_list) { if (!TestPMgr::initialize(arg_cnt, arg_list)) return false; @@ -116,7 +117,7 @@ class TestPGenSQTT : public TestPGen { // Initialization the profile memset(&profile, 0, sizeof(profile)); profile.agent = agent; - profile.type = HSA_EXT_AQL_PROFILE_EVENT_SQTT; + profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT; // set parameters // profile.parameters = &event; @@ -124,8 +125,8 @@ class TestPGenSQTT : public TestPGen { // Profile buffers attributes command_buffer_alignment = buffer_alignment; - status = hsa_ext_amd_aql_profile_get_info( - &profile, HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size); + status = api.hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size); test_assert(status == HSA_STATUS_SUCCESS); output_buffer_alignment = buffer_alignment; @@ -146,12 +147,12 @@ class TestPGenSQTT : public TestPGen { profile.output_buffer.size = output_buffer_size; // Populating the AQL start packet - status = hsa_ext_amd_aql_profile_start(&profile, PrePacket()); + status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket()); test_assert(status == HSA_STATUS_SUCCESS); if (status != HSA_STATUS_SUCCESS) return false; // Populating the AQL stop packet - status = hsa_ext_amd_aql_profile_stop(&profile, PostPacket()); + status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket()); test_assert(status == HSA_STATUS_SUCCESS); return (status == HSA_STATUS_SUCCESS); diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pmgr.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pmgr.cpp similarity index 93% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pmgr.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pmgr.cpp index cef2b8cc5c..3f95b449c0 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pmgr.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pmgr.cpp @@ -61,7 +61,7 @@ bool TestPMgr::addPacketGfx8(const packet_t* packet) { // Create legacy devices PM4 data const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet; slot_pm4_s data; - hsa_ext_amd_aql_profile_legacy_get_pm4(aql_packet, (void*)data.words); + api.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast(data.words)); // Compute the write index of queue and copy Aql packet into it uint64_t que_idx = hsa_queue_load_write_index_relaxed(getQueue()); @@ -122,4 +122,9 @@ bool TestPMgr::initialize(int argc, char** argv) { TestPMgr::TestPMgr(TestAql* t) : TestAql(t) { dummySignal.handle = 0; postSignal = dummySignal; + + hsa_status_t status = hsa_init(); + test_assert(status == HSA_STATUS_SUCCESS); + status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api); + test_assert(status == HSA_STATUS_SUCCESS); } diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pmgr.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pmgr.h similarity index 85% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pmgr.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pmgr.h index 15ea509cb8..c74f1fd2eb 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/ctrl/test_pmgr.h +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/ctrl/test_pmgr.h @@ -25,18 +25,20 @@ OF THE POSSIBILITY OF SUCH DAMAGE. *******************************************************************************/ -#ifndef _TEST_SMGR_H_ -#define _TEST_SMGR_H_ +#ifndef _TEST_PMGR_H_ +#define _TEST_PMGR_H_ #include +#include "hsa.h" #include "test_aql.h" +#include "hsa_ven_amd_aqlprofile.h" // SimpleConvolution: Class implements OpenCL SimpleConvolution sample class TestPMgr : public TestAql { public: typedef hsa_ext_amd_aql_pm4_packet_t packet_t; - TestPMgr(TestAql* t); + explicit TestPMgr(TestAql* t); bool run(); protected: @@ -45,14 +47,16 @@ class TestPMgr : public TestAql { hsa_signal_t dummySignal; hsa_signal_t postSignal; + hsa_ven_amd_aqlprofile_1_00_pfn_t api; + virtual bool buildPackets() { return false; } virtual bool dumpData() { return false; } virtual bool initialize(int argc, char** argv); private: enum { - SLOT_PM4_SIZE_DW = HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t), - SLOT_PM4_SIZE_AQLP = HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t) + SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t), + SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t) }; struct slot_pm4_s { uint32_t words[SLOT_PM4_SIZE_DW]; @@ -64,4 +68,4 @@ class TestPMgr : public TestAql { bool addPacketGfx9(const packet_t* packet); }; -#endif // _TEST_SMGR_H_ +#endif // _TEST_PMGR_H_ diff --git a/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/run.sh b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/run.sh new file mode 100755 index 0000000000..54ead87204 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/run.sh @@ -0,0 +1,30 @@ +#/bin/sh +set -x + +tbin=./test/ctrl + +CDIR=`pwd` +export LD_LIBRARY_PATH=$CDIR + +export HSA_ENABLE_SDMA=0 +export HSA_EMULATE_AQL=1 + +echo +echo "Run simple convolution kernel" +unset ROCR_ENABLE_PMC +unset ROCR_ENABLE_SQTT +eval $tbin + +echo +echo "Run with PMC" +export ROCR_ENABLE_PMC=1 +unset ROCR_ENABLE_SQTT +eval $tbin + +echo +echo "Run with SQTT" +unset ROCR_ENABLE_PMC +export ROCR_ENABLE_SQTT=1 +eval $tbin + + diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/gfx8_simpleConvolution.hsaco b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/gfx8_simpleConvolution.hsaco similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/gfx8_simpleConvolution.hsaco rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/gfx8_simpleConvolution.hsaco diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/gfx9_simpleConvolution.hsaco b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/gfx9_simpleConvolution.hsaco similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/gfx9_simpleConvolution.hsaco rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/gfx9_simpleConvolution.hsaco diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.cl b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.cl similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.cl rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.cl diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.hsail b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.hsail similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/simple_convolution/simple_convolution.hsail rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/simple_convolution/simple_convolution.hsail diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/CMakeLists.txt similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/CMakeLists.txt rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/CMakeLists.txt diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/helper_funcs.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/helper_funcs.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/helper_funcs.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/helper_funcs.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/helper_funcs.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/helper_funcs.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/helper_funcs.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/helper_funcs.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/hsa_rsrc_factory.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/hsa_rsrc_factory.cpp similarity index 99% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/hsa_rsrc_factory.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/hsa_rsrc_factory.cpp index e1163b8542..6c0cd31825 100644 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/hsa_rsrc_factory.cpp +++ b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/hsa_rsrc_factory.cpp @@ -12,7 +12,6 @@ #include "hsa.h" #include "hsa_rsrc_factory.h" #include "hsa_ext_finalize.h" -#include "hsa_ext_profiler.h" using namespace std; diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/hsa_rsrc_factory.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/hsa_rsrc_factory.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/hsa_rsrc_factory.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/hsa_rsrc_factory.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/perf_timer.cpp b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/perf_timer.cpp similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/perf_timer.cpp rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/perf_timer.cpp diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/perf_timer.h b/projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/perf_timer.h similarity index 100% rename from projects/rocr-runtime/runtime/hsa-ext-aql-profile/test/util/perf_timer.h rename to projects/rocr-runtime/runtime/hsa-amd-aqlprofile/test/util/perf_timer.h diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/doc/HSA_ext_profile_api_v1_1_0.docx b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/doc/HSA_ext_profile_api_v1_1_0.docx deleted file mode 100644 index 35ae8579aa..0000000000 Binary files a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/doc/HSA_ext_profile_api_v1_1_0.docx and /dev/null differ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/inc/hsa_ext_amd_aql_profile.h b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/inc/hsa_ext_amd_aql_profile.h deleted file mode 100644 index abeb2d88db..0000000000 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/inc/hsa_ext_amd_aql_profile.h +++ /dev/null @@ -1,267 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// Copyright 2017 ADVANCED MICRO DEVICES, INC. -// -// AMD is granting you permission to use this software and documentation(if any) -// (collectively, the "Materials") pursuant to the terms and conditions of the -// Software License Agreement included with the Materials.If you do not have a -// copy of the Software License Agreement, contact your AMD representative for a -// copy. -// -// You agree that you will not reverse engineer or decompile the Materials, in -// whole or in part, except as allowed by applicable law. -// -// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF -// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY, -// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE -// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM -// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF -// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion -// of implied warranties, so the above exclusion may not apply to You. -// -// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT, -// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT, -// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF -// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN -// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total -// liability to You for all damages, losses, and causes of action (whether in -// contract, tort (including negligence) or otherwise) exceed the amount of $100 -// USD. You agree to defend, indemnify and hold harmless AMD and its licensors, -// and any of their directors, officers, employees, affiliates or agents from -// and against any and all loss, damage, liability and other expenses (including -// reasonable attorneys' fees), resulting from Your use of the Software or -// violation of the terms and conditions of this Agreement. -// -// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with -// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is -// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 - -// 7013, et seq., or its successor.Use of the Materials by the Government -// constitutes acknowledgement of AMD's proprietary rights in them. -// -// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as -// stated in the Software License Agreement. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _HSA_EXT_AMD_AQL_PROFILE_H_ -#define _HSA_EXT_AMD_AQL_PROFILE_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/////////////////////////////////////////////////////////////////////// -// Library API: -// The library provides helper methods for instantiation of -// the profile context object and for populating of the start -// and stop AQL packets. The profile object contains a profiling -// events list and needed for profiling buffers descriptors, -// a command buffer and an output data buffer. To check if there -// was an error the library methods return a status code. Also -// the library provides methods for querying required buffers -// attributes, to validate the event attributes and to get profiling -// output data. -// -// Returned status: -// hsa_status_t – HSA status codes are used from hsa.h header -// -// Supported profiling features: -// -// Supported profiling events -typedef enum { - HSA_EXT_AQL_PROFILE_EVENT_PMC, - HSA_EXT_AQL_PROFILE_EVENT_SQTT -} hsa_ext_amd_aql_profile_event_type_t; - -// Supported performance counters (PMC) blocks -// The block ID is the same for a block instances set, for example -// each block instance from the TCC block set, TCC0, TCC1, …, TCCN -// will have the same block ID HSA_EXT_AQL_PROFILE_BLOCKS_TCC. -typedef enum { - HSA_EXT_AQL_PROFILE_BLOCK_CB, - HSA_EXT_AQL_PROFILE_BLOCK_CPF, - HSA_EXT_AQL_PROFILE_BLOCK_DB, - HSA_EXT_AQL_PROFILE_BLOCK_GRBM, - HSA_EXT_AQL_PROFILE_BLOCK_GRBMSE, - HSA_EXT_AQL_PROFILE_BLOCK_PASU, - HSA_EXT_AQL_PROFILE_BLOCK_PASC, - HSA_EXT_AQL_PROFILE_BLOCK_SPI, - HSA_EXT_AQL_PROFILE_BLOCK_SQ, - HSA_EXT_AQL_PROFILE_BLOCK_SQES, - HSA_EXT_AQL_PROFILE_BLOCK_SQGS, - HSA_EXT_AQL_PROFILE_BLOCK_SQVS, - HSA_EXT_AQL_PROFILE_BLOCK_SQPS, - HSA_EXT_AQL_PROFILE_BLOCK_SQLS, - HSA_EXT_AQL_PROFILE_BLOCK_SQHS, - HSA_EXT_AQL_PROFILE_BLOCK_SQCS, - HSA_EXT_AQL_PROFILE_BLOCK_SX, - HSA_EXT_AQL_PROFILE_BLOCK_TA, - HSA_EXT_AQL_PROFILE_BLOCK_TCA, - HSA_EXT_AQL_PROFILE_BLOCK_TCC, - HSA_EXT_AQL_PROFILE_BLOCK_TD, - HSA_EXT_AQL_PROFILE_BLOCK_TCP, - HSA_EXT_AQL_PROFILE_BLOCK_GDS, - HSA_EXT_AQL_PROFILE_BLOCK_VGT, - HSA_EXT_AQL_PROFILE_BLOCK_IA, - HSA_EXT_AQL_PROFILE_BLOCK_MC, - HSA_EXT_AQL_PROFILE_BLOCK_SRBM, - HSA_EXT_AQL_PROFILE_BLOCK_TCS, - HSA_EXT_AQL_PROFILE_BLOCK_WD, - HSA_EXT_AQL_PROFILE_BLOCK_CPG, - HSA_EXT_AQL_PROFILE_BLOCK_CPC, - HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER -} hsa_ext_amd_aql_profile_block_name_t; - -// PMC event object structure -// ‘counter_id’ value is specified in GFXIPs perfcounter user guides -// which is the counters select value, “Performance Counters Selection” -// chapter. -typedef struct { - hsa_ext_amd_aql_profile_block_name_t block_name; - uint32_t block_index; - uint32_t counter_id; -} hsa_ext_amd_aql_profile_event_t; - -// Check if event is valid for the specific GPU -hsa_status_t hsa_ext_amd_aql_profile_validate_event( - hsa_agent_t agent, // HSA handle for the profiling GPU - const hsa_ext_amd_aql_profile_event_t* event, // Pointer on validated event - bool* result); // True if the event valid, False otherwise - -// Profiling parameters -// All parameters are generic and if not applicable for a specific -// profile configuration then error status will be returned. -typedef enum { - // SQTT applicable parameters - HSA_EXT_AQL_PROFILE_PARAM_COMPUTE_UNIT_TARGET, - HSA_EXT_AQL_PROFILE_PARAM_VM_ID_MASK, - HSA_EXT_AQL_PROFILE_PARAM_MASK, - HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK, - HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK2 -} hsa_ext_amd_aql_profile_parameter_name_t; - -// Profile parameter object -typedef struct { - hsa_ext_amd_aql_profile_parameter_name_t parameter_name; - uint32_t value; -} hsa_ext_amd_aql_profile_parameters_t; - -// -// Profile context object: -// The library provides a profile object structure which contains -// the events array, a buffer for the profiling start/stop commands -// and a buffer for the output data. -// The buffers are specified by the buffer descriptors and allocated -// by the application. The buffers allocation attributes, the command -// buffer size, the PMC output buffer size as well as profiling output -// data can be get using the generic get profile info helper _get_info. -// -// Buffer descriptor -typedef struct { - void* ptr; - uint32_t size; -} hsa_ext_amd_aql_profile_descriptor_t; - -// Profile context object structure, contains profiling events list and -// needed for profiling buffers descriptors, a command buffer and -// an output data buffer -typedef struct { - hsa_agent_t agent; // GFXIP handle - hsa_ext_amd_aql_profile_event_type_t type; // Events type - const hsa_ext_amd_aql_profile_event_t* events; // Events array - uint32_t event_count; // Events count - const hsa_ext_amd_aql_profile_parameters_t* parameters; // Parameters array - uint32_t parameter_count; // Parameters count - hsa_ext_amd_aql_profile_descriptor_t output_buffer; // Output buffer - hsa_ext_amd_aql_profile_descriptor_t command_buffer; // PM4 commands -} hsa_ext_amd_aql_profile_profile_t; - -// -// AQL packets populating methods: -// The helper methods to populate provided by the application START and -// STOP AQL packets which the application is required to submit before and -// after profiled GPU task packets respectively. -// -// AQL Vendor Specific packet which carries a PM4 command -typedef struct { - uint16_t header; - uint16_t pm4_command[27]; - hsa_signal_t completion_signal; -} hsa_ext_amd_aql_pm4_packet_t; - -// Method to populate the provided AQL packet with profiling start commands -// Only 'pm4_command' fields of the packet are set and the application -// is responsible to set Vendor Specific header type a completion signal -hsa_status_t hsa_ext_amd_aql_profile_start( - const hsa_ext_amd_aql_profile_profile_t* profile, // [in] profile contex object - hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet - -// Method to populate the provided AQL packet with profiling stop commands -// Only 'pm4_command' fields of the packet are set and the application -// is responsible to set Vendor Specific header type and a completion signal -hsa_status_t hsa_ext_amd_aql_profile_stop( - const hsa_ext_amd_aql_profile_profile_t* profile, // [in] profile contex object - hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet - -// Legacy devices, PM4 profiling packet size -const unsigned HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE = 192; -// Legacy devices, converting the profiling AQL packet to PM4 packet blob -hsa_status_t hsa_ext_amd_aql_profile_legacy_get_pm4( - const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet - void* data); // [out] PM4 packet blob - -// -// Get profile info: -// Generic method for getting various profile info including profile buffers -// attributes like the command buffer size and the profiling PMC results. -// It’s implied that all counters are 64bit values. -// -// Profile generic output data: -typedef struct { - uint32_t sample_id; // PMC sample of SQTT buffer index - union { - struct { - hsa_ext_amd_aql_profile_event_t event; // PMC event - uint64_t result; // PMC result - } pmc_data; - hsa_ext_amd_aql_profile_descriptor_t sqtt_data; // SQTT output data descriptor - }; -} hsa_ext_amd_aql_profile_info_data_t; - -// Profile attributes -typedef enum { - HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, // get_info returns uint32_t value - HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE, // get_info returns uint32_t value - HSA_EXT_AQL_PROFILE_INFO_PMC_DATA, // get_info returns PMC uint64_t value - // in info_data object - HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA // get_info returns SQTT buffer ptr/size - // in info_data object -} hsa_ext_amd_aql_profile_info_type_t; - -// Definition of output data iterator callback -typedef hsa_status_t (*hsa_ext_amd_aql_profile_data_callback_t)( - hsa_ext_amd_aql_profile_info_type_t info_type, // [in] data type, PMC or SQTT data - hsa_ext_amd_aql_profile_info_data_t* info_data, // [in] info_data object - void* callback_data); // [in/out] data passed to the callback - -// Method for getting the profile info -hsa_status_t hsa_ext_amd_aql_profile_get_info( - const hsa_ext_amd_aql_profile_profile_t* profile, // [in] profile context object - hsa_ext_amd_aql_profile_info_type_t attribute, // [in] requested profile attribute - void* value); // [in/out] returned value - -// Method for iterating the events output data -hsa_status_t hsa_ext_amd_aql_profile_iterate_data( - const hsa_ext_amd_aql_profile_profile_t* profile, // [in] profile context object - hsa_ext_amd_aql_profile_data_callback_t callback, // [in] callback to iterate the output data - void* data); // [in/out] data passed to the callback - -#ifdef __cplusplus -} -#endif // __cplusplus - -#endif // _HSA_EXT_AMD_AQL_PROFILE_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile.h b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile.h deleted file mode 100644 index 2764a0e702..0000000000 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _AQL_PROFILE_H_ -#define _AQL_PROFILE_H_ - -#include "hsa_ext_amd_aql_profile.h" - -namespace pm4_profile { -class CommandWriter; -} - -namespace aql_profile { -typedef hsa_ext_amd_aql_profile_descriptor_t descriptor_t; -typedef hsa_ext_amd_aql_profile_profile_t profile_t; -typedef hsa_ext_amd_aql_profile_info_type_t info_type_t; -typedef hsa_ext_amd_aql_profile_data_callback_t data_callback_t; -typedef hsa_ext_amd_aql_pm4_packet_t packet_t; -typedef hsa_ext_amd_aql_profile_event_t event_t; - -void populateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_profile::CommandWriter* cmd_writer, - packet_t* aql_packet); -void* legacyAqlAcquire(const packet_t* aql_packet, void* data); -void* legacyAqlRelease(const packet_t* aql_packet, void* data); -void* legacyPm4(const packet_t* aql_packet, void* data); -} - -#endif // _AQL_PROFILE_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile_exception.h b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile_exception.h deleted file mode 100644 index bfba9dd0df..0000000000 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/aql_profile_exception.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _AQL_PROFILE_EXCEPTION_H_ -#define _AQL_PROFILE_EXCEPTION_H_ - -#include -#include - -namespace aql_profile { - -template class aql_profile_exception : public std::exception { - public: - aql_profile_exception(const std::string& m, const T& v) : msg(m), val(v) {} - virtual const char* what() const throw() { - std::ostringstream oss; - oss << msg << "(" << val << ")"; - return strdup(oss.str().c_str()); - } - - private: - std::string msg; - T val; -}; -} - -#endif // _AQL_PROFILE_EXCEPTION_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/logger.h b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/logger.h deleted file mode 100644 index 0034c04b2c..0000000000 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/logger.h +++ /dev/null @@ -1,98 +0,0 @@ -#ifndef _LOGGER_H_ -#define _LOGGER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace aql_profile { - -class Logger { - public: - void msg(const std::string& m) { log(m); } - - void prn(const char* fmt, ...) { - const size_t formatted_size = 256; - char formatted_string[formatted_size]; - va_list argptr; - va_start(argptr, fmt); - vsnprintf(formatted_string, formatted_size, fmt, argptr); - va_end(argptr); - msg(formatted_string); - } - - template Logger& operator<<(const T& m) { - std::ostringstream oss; - oss << m; - if (!streaming) - log(oss.str()); - else - put(oss.str()); - streaming = true; - return *this; - } - - typedef void (*manip_t)(Logger&); - Logger& operator<<(manip_t f) { - f(*this); - return *this; - } - - static void endl(Logger& logger) { logger.streaming = false; } - - Logger() : file(NULL), dirty(false), streaming(false) { - const char* path = getenv("HSA_EXT_AQL_PROFILE_LOG"); - if (path != NULL) { - file = fopen("/tmp/aql_profile_log.txt", "a"); - } - } - ~Logger() { - if (file != NULL) { - if (dirty) put("\n"); - fclose(file); - } - } - - private: - void put(const std::string& m) { - if (file != NULL) { - dirty = true; - flock(fileno(file), LOCK_EX); - fprintf(file, "%s", m.c_str()); - fflush(file); - flock(fileno(file), LOCK_UN); - } - } - - void log(const std::string& m) { - const time_t rawtime = time(NULL); - const tm* tm_info = localtime(&rawtime); - char tm_str[26]; - strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", tm_info); - std::ostringstream oss; - oss << "\n<" << tm_str << std::dec << " pid" << syscall(__NR_getpid) << " tid" - << syscall(__NR_gettid) << "> " << m; - put(oss.str()); - } - - FILE* file; - bool dirty; - bool streaming; -}; - -} // aql_profile - -#define ERR_LOGGING(logger) \ - (logger << aql_profile::Logger::endl << "Error: " << __FUNCTION__ << "(): ") - -#endif // _LOGGER_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/pm4_factory.h b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/pm4_factory.h deleted file mode 100644 index 9402109ae3..0000000000 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/aqlprofile/pm4_factory.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef _PM4_FACTORY_H_ -#define _PM4_FACTORY_H_ - -#include -#include -#include -#include - -#include "aql_profile.h" -#include "gpu_block_info.h" -#include "aql_profile_exception.h" - -namespace pm4_profile { -class CommandWriter; -class Pmu; -class ThreadTrace; -extern GpuBlockInfo Gfx9HwBlocks[]; -extern const uint32_t Gfx9HwBlockCount; -extern GpuBlockInfo Gfx8HwBlocks[]; -extern const uint32_t Gfx8HwBlockCount; -} - -namespace aql_profile { - -class Pm4Factory { - public: - enum { kBadBlockId = UINT_MAX }; - - static Pm4Factory* Create(const hsa_ext_amd_aql_profile_profile_t* profile); - virtual pm4_profile::CommandWriter* getCommandWriter() = 0; - virtual pm4_profile::Pmu* getPmcMgr() = 0; - virtual pm4_profile::ThreadTrace* getSqttMgr() = 0; - - uint32_t getBlockId(const event_t* event) { - const hsa_ext_amd_aql_profile_block_name_t& block_name = event->block_name; - if (block_name >= tables.get_block_id_count()) - throw aql_profile_exception(std::string("Invalid block name, block_name"), - block_name); - return (block_name < tables.get_block_id_count()) - ? tables.get_block_id_ptr()[block_name] + event->block_index - : kBadBlockId; - } - const pm4_profile::GpuBlockInfo* getBlockInfo(const uint32_t& block_id) { - const pm4_profile::GpuBlockInfo* info = NULL; - if (block_id < tables.get_block_info_count()) { - info = tables.get_block_info_ptr() + block_id; - if (info->counterGroupId != block_id) - throw aql_profile_exception(std::string("Bad block id table, block_id"), - block_id); - } else - throw aql_profile_exception(std::string("Invalid block id, block_id"), block_id); - return info; - } - const pm4_profile::GpuBlockInfo* getBlockInfo(const event_t* event) { - const uint32_t block_id = getBlockId(event); - return getBlockInfo(block_id); - } - - protected: - class tables_t { - public: - tables_t(uint32_t* dp, uint32_t dc, pm4_profile::GpuBlockInfo* ip, uint32_t ic) - : block_id_ptr(dp), block_id_count(dc), block_info_ptr(ip), block_info_count(ic) {} - tables_t(const tables_t& t) - : block_id_ptr(t.block_id_ptr), - block_id_count(t.block_id_count), - block_info_ptr(t.block_info_ptr), - block_info_count(t.block_info_count) {} - tables_t() : block_id_ptr(0), block_id_count(0), block_info_ptr(0), block_info_count(0) {} - - uint32_t* get_block_id_ptr() { return block_id_ptr; } - uint32_t get_block_id_count() { return block_id_count; } - pm4_profile::GpuBlockInfo* get_block_info_ptr() { return block_info_ptr; } - uint32_t get_block_info_count() { return block_info_count; } - - private: - uint32_t* block_id_ptr; - uint32_t block_id_count; - pm4_profile::GpuBlockInfo* block_info_ptr; - uint32_t block_info_count; - }; - - Pm4Factory(const tables_t& t) { tables = t; } - - static tables_t tables; -}; - -class Gfx8Factory : public Pm4Factory { - public: - Gfx8Factory() - : Pm4Factory(tables_t(block_id_table, HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER, - pm4_profile::Gfx8HwBlocks, pm4_profile::Gfx8HwBlockCount)) {} - pm4_profile::CommandWriter* getCommandWriter(); - pm4_profile::Pmu* getPmcMgr(); - pm4_profile::ThreadTrace* getSqttMgr(); - - private: - static uint32_t block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER]; -}; - -class Gfx9Factory : public Pm4Factory { - public: - Gfx9Factory() - : Pm4Factory(tables_t(block_id_table, HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER, - pm4_profile::Gfx9HwBlocks, pm4_profile::Gfx9HwBlockCount)) {} - pm4_profile::CommandWriter* getCommandWriter(); - pm4_profile::Pmu* getPmcMgr(); - pm4_profile::ThreadTrace* getSqttMgr(); - - private: - static uint32_t block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER]; -}; - -inline Pm4Factory* Pm4Factory::Create(const hsa_ext_amd_aql_profile_profile_t* profile) { - Pm4Factory* instance = NULL; - char agent_name[64]; - hsa_agent_get_info(profile->agent, HSA_AGENT_INFO_NAME, agent_name); - - if (strncmp(agent_name, "gfx801", 6) == 0) { - throw aql_profile_exception(std::string("GFX8 Carrizo is not supported "), - agent_name); - } else if (strncmp(agent_name, "gfx8", 4) == 0) { - instance = new Gfx8Factory(); - } else if (strncmp(agent_name, "gfx9", 4) == 0) { - instance = new Gfx9Factory(); - } - - return instance; -} - -} // aql_profile - -#endif // _PM4_FACTORY_H_ diff --git a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/perf_counter.h b/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/perf_counter.h deleted file mode 100644 index dffdcce027..0000000000 --- a/projects/rocr-runtime/runtime/hsa-ext-aql-profile/src/perfcounter/perf_counter.h +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef _HSA_PERF_H_ -#define _HSA_PERF_H_ - -#include -#include -#include -#include - -namespace pm4_profile { -class DefaultCmdBuf; -class CommandWriter; - -typedef std::vector CountersVec; -typedef std::map CountersMap; - -class Pmu { - public: - // Enumeration of Pmu error codes - typedef enum ErrorCode { - // Generic PMU error - kErrorCodeNoError = 0x0, - - // Unknown CounterBlock ID - kErrorCodeUnknownCounterBlockId, - - // No CounterBlock exists - kErrorCodeNoCounterBlock, - - // The previously operation is not valid. This could be due to - // invalid transition from the current state. - kErrorCodeInvalidOperation, - - // PMU is not currently available (e.g. PMU is currently - // in-used by others) - kErrorCodeNotAvailable, - - // PMU is not currently available (e.g. PMU is currently - // in-used by others) - kErrorCodeErrorState, - - // PMU result is timeout - kErrorCodeTimeOut, - - // Max error count - kErrorCodeMax - } ErrorCode; - - // Destructor of PMU. - // note This stops the performance counters if running and releases - // any resources used by the PMU. - virtual ~Pmu() {} - - // Retrieve the last error code generated. This should be checked when - // values returned are NULL or void. - // Return an integer corresponding to the last error reported. - virtual int getLastError() = 0; - - // Given and error number reported from getLastError or returned from a - // function call, retreive the corresponding stl string. - // @param[in] error The error corresponding to a call to getLastError - // or a return code from a function call. - // Return An stl string representing a text corresponding to the error - // number. If invalid error code is given, the returned string is empty. - virtual std::string getErrorString(int error) = 0; - - // Start profiling on the PMU. - // @param[in] reset_counter indicates whether reset counter before - // recording. Default is reset counters. - // note This function must be implemented by children classes. - // Return true or false - // Possible error codes are: - // kErrorCodeInvalidOperation - // kErrorCodeNotAvailable - virtual void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, - const CountersMap& countersMap) = 0; - - // Stop profiling on the PMU. - // note This function must be called after \ref begin(). - // note This function must be implemented by children classes. - // Return true or false - // Possible error codes are: - // kErrorCodeInvalidOperation - virtual uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, - const CountersMap& countersMap, void* dataBuff) = 0; - - // Returns number of shader engines per block - // for the blocks featured shader engines instancing - virtual uint32_t getNumSe() = 0; - -}; // class Pmu -} // pm4_profile -#endif // _HSA_PERF_H_ diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h index 769dbed2e0..67acd50cd5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h @@ -51,12 +51,14 @@ namespace core { static const uint32_t HSA_EXT_FINALIZER_API_TABLE_ID = 0; static const uint32_t HSA_EXT_IMAGE_API_TABLE_ID = 1; + static const uint32_t HSA_EXT_AQLPROFILE_API_TABLE_ID = 2; ::HsaApiTable hsa_api; ::CoreApiTable core_api; ::AmdExtTable amd_ext_api; ::FinalizerExtTable finalizer_api; ::ImageExtTable image_api; + ::AqlProfileExtTable aqlprofile_api; HsaApiTable(); void Init(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h index 236a165c73..b86d435e75 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h @@ -65,10 +65,14 @@ class ExtensionEntryPoints { // Table of function pointers for Hsa Extension Finalizer FinalizerExtTable finalizer_api; + // Table of function pointers for Hsa Extension AqlProfiler + AqlProfileExtTable aqlprofile_api; + ExtensionEntryPoints(); bool LoadFinalizer(std::string library_name); bool LoadImage(std::string library_name); + bool LoadAqlProfileApi(std::string library_name); void Unload(); private: @@ -83,6 +87,9 @@ class ExtensionEntryPoints { // Initialize table for HSA Image Extension Api's void InitImageExtTable(); + // Initialize table for HSA AqlProfile Extension Api's + void InitAqlProfileExtTable(); + // Initialize Amd Ext table for Api related to Images void InitAmdExtTable(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp index 64129ac03f..04217e21c5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -70,6 +70,7 @@ #include "core/inc/interrupt_signal.h" #include "core/inc/amd_loader_context.hpp" #include "inc/hsa_ven_amd_loader.h" +#include "inc/hsa_ven_amd_aqlprofile.h" #include "core/inc/hsa_ext_amd_impl.h" using namespace amd::hsa; @@ -230,6 +231,9 @@ hsa_status_t hsa_extension_get_name(uint16_t extension, const char** name) { case HSA_EXTENSION_AMD_LOADER: *name = "HSA_EXTENSION_AMD_LOADER"; break; + case HSA_EXTENSION_AMD_AQLPROFILE: + *name = "HSA_EXTENSION_AMD_AQLPROFILE"; + break; default: *name = "HSA_EXTENSION_INVALID"; return HSA_STATUS_ERROR_INVALID_ARGUMENT; @@ -293,6 +297,12 @@ hsa_status_t hsa_system_major_extension_supported(uint16_t extension, uint16_t v return HSA_STATUS_SUCCESS; } + if ((extension == HSA_EXTENSION_AMD_AQLPROFILE) && (version_major == 1)) { + *version_minor = 0; + *result = true; + return HSA_STATUS_SUCCESS; + } + *result = false; return HSA_STATUS_SUCCESS; } @@ -306,7 +316,8 @@ static size_t get_extension_table_length(uint16_t extension, uint16_t major, uin static sizes_t sizes[] = { {"hsa_ext_images_1_00_pfn_t", sizeof(hsa_ext_images_1_00_pfn_t)}, {"hsa_ext_finalizer_1_00_pfn_t", sizeof(hsa_ext_finalizer_1_00_pfn_t)}, - {"hsa_ven_amd_loader_1_00_pfn_t", sizeof(hsa_ven_amd_loader_1_00_pfn_t)}}; + {"hsa_ven_amd_loader_1_00_pfn_t", sizeof(hsa_ven_amd_loader_1_00_pfn_t)}, + {"hsa_ven_amd_aqlprofile_1_00_pfn_t", sizeof(hsa_ven_amd_aqlprofile_1_00_pfn_t)}}; static const size_t num_tables = sizeof(sizes) / sizeof(sizes_t); if (minor > 99) return 0; @@ -332,6 +343,9 @@ static size_t get_extension_table_length(uint16_t extension, uint16_t major, uin case HSA_EXTENSION_AMD_LOADER: name = "hsa_ven_amd_loader_"; break; + case HSA_EXTENSION_AMD_AQLPROFILE: + name = "hsa_ven_amd_aqlprofile_"; + break; default: return 0; } @@ -361,7 +375,11 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v if (table_length == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT; if (extension == HSA_EXTENSION_IMAGES) { - if (version_major > 1) return HSA_STATUS_ERROR; + if (version_major != + core::Runtime::runtime_singleton_->extensions_.image_api.version.major_id) { + return HSA_STATUS_ERROR; + } + hsa_ext_images_1_pfn_t ext_table; ext_table.hsa_ext_image_clear = hsa_ext_image_clear; ext_table.hsa_ext_image_copy = hsa_ext_image_copy; @@ -383,7 +401,11 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v } if (extension == HSA_EXTENSION_FINALIZER) { - if (version_major > 1) return HSA_STATUS_ERROR; + if (version_major != + core::Runtime::runtime_singleton_->extensions_.finalizer_api.version.major_id) { + return HSA_STATUS_ERROR; + } + hsa_ext_finalizer_1_00_pfn_t ext_table; ext_table.hsa_ext_program_add_module = hsa_ext_program_add_module; ext_table.hsa_ext_program_create = hsa_ext_program_create; @@ -410,6 +432,26 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v return HSA_STATUS_SUCCESS; } + if (extension == HSA_EXTENSION_AMD_AQLPROFILE) { + if (version_major != + core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.version.major_id) { + return HSA_STATUS_ERROR; + } + + hsa_ven_amd_aqlprofile_1_00_pfn_t ext_table; + ext_table.hsa_ven_amd_aqlprofile_error_string = hsa_ven_amd_aqlprofile_error_string; + ext_table.hsa_ven_amd_aqlprofile_validate_event = hsa_ven_amd_aqlprofile_validate_event; + ext_table.hsa_ven_amd_aqlprofile_start = hsa_ven_amd_aqlprofile_start; + ext_table.hsa_ven_amd_aqlprofile_stop = hsa_ven_amd_aqlprofile_stop; + ext_table.hsa_ven_amd_aqlprofile_legacy_get_pm4 = hsa_ven_amd_aqlprofile_legacy_get_pm4; + ext_table.hsa_ven_amd_aqlprofile_get_info = hsa_ven_amd_aqlprofile_get_info; + ext_table.hsa_ven_amd_aqlprofile_iterate_data = hsa_ven_amd_aqlprofile_iterate_data; + + memcpy(table, &ext_table, Min(sizeof(ext_table), table_length)); + + return HSA_STATUS_SUCCESS; + } + return HSA_STATUS_ERROR; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp index 39dc71fb50..ae6f0ca8f1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp @@ -42,6 +42,8 @@ #include "core/inc/hsa_ext_interface.h" +#include + #include "core/inc/runtime.h" namespace core { @@ -169,6 +171,7 @@ static T0 hsa_amd_null(T1, T2, T3, T4, T5, T6) { ExtensionEntryPoints::ExtensionEntryPoints() { InitFinalizerExtTable(); InitImageExtTable(); + InitAqlProfileExtTable(); InitAmdExtTable(); } @@ -212,6 +215,21 @@ void ExtensionEntryPoints::InitImageExtTable() { image_api.hsa_ext_image_create_with_layout_fn = hsa_ext_null; } +void ExtensionEntryPoints::InitAqlProfileExtTable() { + // Initialize Version of Api Table + aqlprofile_api.version.major_id = 0x00; + aqlprofile_api.version.minor_id = 0x00; + aqlprofile_api.version.step_id = 0x00; + + aqlprofile_api.hsa_ven_amd_aqlprofile_error_string_fn = hsa_ext_null; + aqlprofile_api.hsa_ven_amd_aqlprofile_validate_event_fn = hsa_ext_null; + aqlprofile_api.hsa_ven_amd_aqlprofile_start_fn = hsa_ext_null; + aqlprofile_api.hsa_ven_amd_aqlprofile_stop_fn = hsa_ext_null; + aqlprofile_api.hsa_ven_amd_aqlprofile_legacy_get_pm4_fn = hsa_ext_null; + aqlprofile_api.hsa_ven_amd_aqlprofile_get_info_fn = hsa_ext_null; + aqlprofile_api.hsa_ven_amd_aqlprofile_iterate_data_fn = hsa_ext_null; +} + // Initialize Amd Ext table for Api related to Images void ExtensionEntryPoints::InitAmdExtTable() { hsa_api_table_.amd_ext_api.hsa_amd_image_create_fn = hsa_ext_null; @@ -485,6 +503,81 @@ bool ExtensionEntryPoints::LoadFinalizer(std::string library_name) { return true; } +bool ExtensionEntryPoints::LoadAqlProfileApi(std::string library_name) { + os::LibHandle lib = os::LoadLib(library_name); + if (lib == NULL) { + return false; + } + libs_.push_back(lib); + + void* ptr; + + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_error_string"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_error_string_fn == + (decltype(::hsa_ven_amd_aqlprofile_error_string)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_error_string_fn = (decltype(::hsa_ven_amd_aqlprofile_error_string)*)ptr; + } + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_validate_event"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_validate_event_fn == + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_validate_event_fn = (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)ptr; + } + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_start"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_start_fn == + (decltype(::hsa_ven_amd_aqlprofile_start)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_start_fn = (decltype(::hsa_ven_amd_aqlprofile_start)*)ptr; + } + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_stop"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_stop_fn == + (decltype(::hsa_ven_amd_aqlprofile_stop)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_stop_fn = (decltype(::hsa_ven_amd_aqlprofile_stop)*)ptr; + } + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_legacy_get_pm4_fn == + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_legacy_get_pm4_fn = (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)ptr; + } + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_get_info"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_get_info_fn == + (decltype(::hsa_ven_amd_aqlprofile_get_info)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_get_info_fn = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)ptr; + } + ptr = os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_iterate_data"); + if (ptr != NULL) { + assert(aqlprofile_api.hsa_ven_amd_aqlprofile_iterate_data_fn == + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)hsa_ext_null && + "Duplicate load of extension import."); + aqlprofile_api.hsa_ven_amd_aqlprofile_iterate_data_fn = (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)ptr; + } + + // Initialize Version of Api Table + aqlprofile_api.version.major_id = HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION; + aqlprofile_api.version.minor_id = sizeof(::AqlProfileExtTable); + aqlprofile_api.version.step_id = HSA_AQLPROFILE_API_TABLE_STEP_VERSION; + + // Update handle of table of HSA extensions + hsa_internal_api_table_.CloneExts(&aqlprofile_api, + core::HsaApiTable::HSA_EXT_AQLPROFILE_API_TABLE_ID); + + ptr = os::GetExportAddress(lib, "Load"); + if (ptr != NULL) { + ((Load_t)ptr)(&core::hsa_internal_api_table_.hsa_api); + } + + return true; +} } // namespace core @@ -655,6 +748,57 @@ hsa_status_t hsa_ext_image_create_with_layout( image); } +hsa_status_t hsa_ven_amd_aqlprofile_error_string( + const char** str) // [out] pointer on the error string +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_error_string_fn(str); +} + +hsa_status_t hsa_ven_amd_aqlprofile_validate_event( + hsa_agent_t agent, // HSA handle for the profiling GPU + const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event + bool* result) // [out] True if the event valid, False otherwise +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_validate_event_fn(agent, event, result); +} + +hsa_status_t hsa_ven_amd_aqlprofile_start( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object + hsa_ext_amd_aql_pm4_packet_t* aql_start_packet) // [out] profile start AQL packet +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_start_fn(profile, aql_start_packet); +} + +hsa_status_t hsa_ven_amd_aqlprofile_stop( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object + hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet) // [out] profile stop AQL packet +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_stop_fn(profile, aql_stop_packet); +} + +hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4( + const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet + void* data) // [out] PM4 packet blob +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_legacy_get_pm4_fn(aql_packet, data); +} + +hsa_status_t hsa_ven_amd_aqlprofile_get_info( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object + hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute + void* value) // [in/out] returned value +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_get_info_fn(profile, attribute, value); +} + +hsa_status_t hsa_ven_amd_aqlprofile_iterate_data( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object + hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data + void* data) // [in/out] data passed to the callback +{ + return core::Runtime::runtime_singleton_->extensions_.aqlprofile_api.hsa_ven_amd_aqlprofile_iterate_data_fn(profile, callback, data); +} + //---------------------------------------------------------------------------// // Stubs for internal extension functions //---------------------------------------------------------------------------// diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 442fe82f52..181c4b110d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -1038,11 +1038,15 @@ void Runtime::LoadExtensions() { "libhsa-ext-finalize64.so.1"}; static const std::string kImageLib[] = {"hsa-ext-image64.dll", "libhsa-ext-image64.so.1"}; + static const std::string kAqlProfileLib[] = {"hsa-amd-aqlprofile64.dll", + "libhsa-amd-aqlprofile64.so.1"}; #else static const std::string kFinalizerLib[] = {"hsa-ext-finalize.dll", "libhsa-ext-finalize.so.1"}; static const std::string kImageLib[] = {"hsa-ext-image.dll", "libhsa-ext-image.so.1"}; + static const std::string kAqlProfileLib[] = {"hsa-amd-aqlprofile.dll", + "libhsa-amd-aqlprofile.so.1"}; #endif // Update Hsa Api Table with handle of Image extension Apis @@ -1054,6 +1058,9 @@ void Runtime::LoadExtensions() { extensions_.LoadImage(kImageLib[os_index(os::current_os)]); hsa_api_table_.LinkExts(&extensions_.image_api, core::HsaApiTable::HSA_EXT_IMAGE_API_TABLE_ID); + + // Update Hsa Api Table with handle of AqlProfile extension Apis + extensions_.LoadAqlProfileApi(kAqlProfileLib[os_index(os::current_os)]); } void Runtime::UnloadExtensions() { extensions_.Unload(); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h index f7d15ba7e5..b8ec90a667 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h @@ -535,10 +535,14 @@ typedef enum { * Loader extension. */ HSA_EXTENSION_AMD_LOADER = 0x201, + /** + * AqlProfile extension. + */ + HSA_EXTENSION_AMD_AQLPROFILE = 0x202, /** * Last AMD extension. */ - HSA_AMD_LAST_EXTENSION = 0x201 + HSA_AMD_LAST_EXTENSION = 0x202 } hsa_extension_t; /** diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h index 5ecdf7bb09..e74ec30d09 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -48,10 +48,12 @@ #include "hsa_ext_image.h" #include "hsa_ext_amd.h" #include "hsa_ext_finalize.h" +#include "hsa_ven_amd_aqlprofile.h" #else #include "inc/hsa_ext_image.h" #include "inc/hsa_ext_amd.h" #include "inc/hsa_ext_finalize.h" +#include "inc/hsa_ven_amd_aqlprofile.h" #endif #include @@ -64,6 +66,7 @@ #define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x01 #define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x01 #define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x01 +#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01 // Step Ids of the Api tables exported by Hsa Core Runtime #define HSA_API_TABLE_STEP_VERSION 0x00 @@ -71,6 +74,7 @@ #define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x00 #define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00 #define HSA_IMAGE_API_TABLE_STEP_VERSION 0x00 +#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00 // Min function used to copy Api Tables static inline uint32_t Min(const uint32_t a, const uint32_t b) { @@ -116,6 +120,18 @@ struct ImageExtTable { decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn; }; +// Table to export HSA AqlProfile AMD specific Extension Apis +struct AqlProfileExtTable { + ApiTableVersion version; + decltype(hsa_ven_amd_aqlprofile_error_string)* hsa_ven_amd_aqlprofile_error_string_fn; + decltype(hsa_ven_amd_aqlprofile_validate_event)* hsa_ven_amd_aqlprofile_validate_event_fn; + decltype(hsa_ven_amd_aqlprofile_start)* hsa_ven_amd_aqlprofile_start_fn; + decltype(hsa_ven_amd_aqlprofile_stop)* hsa_ven_amd_aqlprofile_stop_fn; + decltype(hsa_ven_amd_aqlprofile_legacy_get_pm4)* hsa_ven_amd_aqlprofile_legacy_get_pm4_fn; + decltype(hsa_ven_amd_aqlprofile_get_info)* hsa_ven_amd_aqlprofile_get_info_fn; + decltype(hsa_ven_amd_aqlprofile_iterate_data)* hsa_ven_amd_aqlprofile_iterate_data_fn; +}; + // Table to export AMD Extension Apis struct AmdExtTable { ApiTableVersion version; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ven_amd_aqlprofile.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ven_amd_aqlprofile.h new file mode 100644 index 0000000000..784df62c2f --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ven_amd_aqlprofile.h @@ -0,0 +1,304 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2017 ADVANCED MICRO DEVICES, INC. +// +// AMD is granting you permission to use this software and documentation(if any) +// (collectively, the "Materials") pursuant to the terms and conditions of the +// Software License Agreement included with the Materials.If you do not have a +// copy of the Software License Agreement, contact your AMD representative for a +// copy. +// +// You agree that you will not reverse engineer or decompile the Materials, in +// whole or in part, except as allowed by applicable law. +// +// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY, +// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE +// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM +// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF +// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion +// of implied warranties, so the above exclusion may not apply to You. +// +// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT, +// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT, +// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF +// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total +// liability to You for all damages, losses, and causes of action (whether in +// contract, tort (including negligence) or otherwise) exceed the amount of $100 +// USD. You agree to defend, indemnify and hold harmless AMD and its licensors, +// and any of their directors, officers, employees, affiliates or agents from +// and against any and all loss, damage, liability and other expenses (including +// reasonable attorneys' fees), resulting from Your use of the Software or +// violation of the terms and conditions of this Agreement. +// +// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with +// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is +// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 - +// 7013, et seq., or its successor.Use of the Materials by the Government +// constitutes acknowledgement of AMD's proprietary rights in them. +// +// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as +// stated in the Software License Agreement. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_ +#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/////////////////////////////////////////////////////////////////////// +// Library API: +// The library provides helper methods for instantiation of +// the profile context object and for populating of the start +// and stop AQL packets. The profile object contains a profiling +// events list and needed for profiling buffers descriptors, +// a command buffer and an output data buffer. To check if there +// was an error the library methods return a status code. Also +// the library provides methods for querying required buffers +// attributes, to validate the event attributes and to get profiling +// output data. +// +// Returned status: +// hsa_status_t – HSA status codes are used from hsa.h header +// +// Supported profiling features: +// +// Supported profiling events +typedef enum { + HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, + HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT +} hsa_ven_amd_aqlprofile_event_type_t; + +// Supported performance counters (PMC) blocks +// The block ID is the same for a block instances set, for example +// each block instance from the TCC block set, TCC0, TCC1, …, TCCN +// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC. +typedef enum { + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CB, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_DB, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_PASU, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_PASC, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQES, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQGS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQVS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQPS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQLS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQHS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_VGT, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_IA, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MC, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCS, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_WD, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPG, + HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, + HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER +} hsa_ven_amd_aqlprofile_block_name_t; + +// PMC event object structure +// ‘counter_id’ value is specified in GFXIPs perfcounter user guides +// which is the counters select value, “Performance Counters Selection” +// chapter. +typedef struct { + hsa_ven_amd_aqlprofile_block_name_t block_name; + uint32_t block_index; + uint32_t counter_id; +} hsa_ven_amd_aqlprofile_event_t; + +// Check if event is valid for the specific GPU +hsa_status_t hsa_ven_amd_aqlprofile_validate_event( + hsa_agent_t agent, // HSA handle for the profiling GPU + const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event + bool* result); // [out] True if the event valid, False otherwise + +// Profiling parameters +// All parameters are generic and if not applicable for a specific +// profile configuration then error status will be returned. +typedef enum { + // SQTT applicable parameters + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET, + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK, + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK, + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK, + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 +} hsa_ven_amd_aqlprofile_parameter_name_t; + +// Profile parameter object +typedef struct { + hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; + uint32_t value; +} hsa_ven_amd_aqlprofile_parameter_t; + +// +// Profile context object: +// The library provides a profile object structure which contains +// the events array, a buffer for the profiling start/stop commands +// and a buffer for the output data. +// The buffers are specified by the buffer descriptors and allocated +// by the application. The buffers allocation attributes, the command +// buffer size, the PMC output buffer size as well as profiling output +// data can be get using the generic get profile info helper _get_info. +// +// Buffer descriptor +typedef struct { + void* ptr; + uint32_t size; +} hsa_ven_amd_aqlprofile_descriptor_t; + +// Profile context object structure, contains profiling events list and +// needed for profiling buffers descriptors, a command buffer and +// an output data buffer +typedef struct { + hsa_agent_t agent; // GFXIP handle + hsa_ven_amd_aqlprofile_event_type_t type; // Events type + const hsa_ven_amd_aqlprofile_event_t* events; // Events array + uint32_t event_count; // Events count + const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array + uint32_t parameter_count; // Parameters count + hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer + hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands +} hsa_ven_amd_aqlprofile_profile_t; + +// +// AQL packets populating methods: +// The helper methods to populate provided by the application START and +// STOP AQL packets which the application is required to submit before and +// after profiled GPU task packets respectively. +// +// AQL Vendor Specific packet which carries a PM4 command +typedef struct { + uint16_t header; + uint16_t pm4_command[27]; + hsa_signal_t completion_signal; +} hsa_ext_amd_aql_pm4_packet_t; + +// Method to populate the provided AQL packet with profiling start commands +// Only 'pm4_command' fields of the packet are set and the application +// is responsible to set Vendor Specific header type a completion signal +hsa_status_t hsa_ven_amd_aqlprofile_start( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object + hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet + +// Method to populate the provided AQL packet with profiling stop commands +// Only 'pm4_command' fields of the packet are set and the application +// is responsible to set Vendor Specific header type and a completion signal +hsa_status_t hsa_ven_amd_aqlprofile_stop( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object + hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet + +// Legacy devices, PM4 profiling packet size +const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192; +// Legacy devices, converting the profiling AQL packet to PM4 packet blob +hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4( + const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet + void* data); // [out] PM4 packet blob + +// +// Get profile info: +// Generic method for getting various profile info including profile buffers +// attributes like the command buffer size and the profiling PMC results. +// It’s implied that all counters are 64bit values. +// +// Profile generic output data: +typedef struct { + uint32_t sample_id; // PMC sample of SQTT buffer index + union { + struct { + hsa_ven_amd_aqlprofile_event_t event; // PMC event + uint64_t result; // PMC result + } pmc_data; + hsa_ven_amd_aqlprofile_descriptor_t sqtt_data; // SQTT output data descriptor + }; +} hsa_ven_amd_aqlprofile_info_data_t; + +// Profile attributes +typedef enum { + HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, // get_info returns uint32_t value + HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE, // get_info returns uint32_t value + HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, // get_info returns PMC uint64_t value + // in info_data object + HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA // get_info returns SQTT buffer ptr/size + // in info_data object +} hsa_ven_amd_aqlprofile_info_type_t; + +// Definition of output data iterator callback +typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)( + hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or SQTT data + hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object + void* callback_data); // [in/out] data passed to the callback + +// Method for getting the profile info +hsa_status_t hsa_ven_amd_aqlprofile_get_info( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object + hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute + void* value); // [in/out] returned value + +// Method for iterating the events output data +hsa_status_t hsa_ven_amd_aqlprofile_iterate_data( + const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object + hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data + void* data); // [in/out] data passed to the callback + +// Return error string +hsa_status_t hsa_ven_amd_aqlprofile_error_string( + const char** str); // [out] pointer on the error string + +/** + * @brief Extension version. + */ +#define hsa_ven_amd_aqlprofile 001000 + +/** + * @brief Extension function table. + */ +typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s { + hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(const char** str); + + hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(hsa_agent_t agent, + const hsa_ven_amd_aqlprofile_event_t* event, + bool* result); + + hsa_status_t (*hsa_ven_amd_aqlprofile_start)(const hsa_ven_amd_aqlprofile_profile_t* profile, + hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); + + hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(const hsa_ven_amd_aqlprofile_profile_t* profile, + hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); + + hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)( + const hsa_ext_amd_aql_pm4_packet_t* aql_packet, void* data); + + hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(const hsa_ven_amd_aqlprofile_profile_t* profile, + hsa_ven_amd_aqlprofile_info_type_t attribute, + void* value); + + hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)( + const hsa_ven_amd_aqlprofile_profile_t* profile, + hsa_ven_amd_aqlprofile_data_callback_t callback, void* data); +} hsa_ven_amd_aqlprofile_1_00_pfn_t; + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_