diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/table_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/table_id.h index 589839496b..71e51da149 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/table_id.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/table_id.h @@ -31,5 +31,6 @@ typedef enum ROCPROFILER_HSA_TABLE_ID_ImageExt, ROCPROFILER_HSA_TABLE_ID_FinalizeExt, ROCPROFILER_HSA_TABLE_ID_AmdTool, + ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, ROCPROFILER_HSA_TABLE_ID_LAST, } rocprofiler_hsa_table_id_t; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/CMakeLists.txt index 74a19cd342..dcaacd9b3c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/CMakeLists.txt @@ -6,6 +6,7 @@ set(ROCPROFILER_LIB_HSA_SOURCES code_object.cpp hsa_barrier.cpp hsa.cpp + pc_sampling.hpp profile_serializer.cpp queue_controller.cpp queue.cpp @@ -19,6 +20,7 @@ set(ROCPROFILER_LIB_HSA_HEADERS defines.hpp hsa_barrier.hpp hsa.hpp + pc_sampling.cpp profile_serializer.hpp queue_controller.hpp queue.hpp diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp index d29fbd2de3..c41547162d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp @@ -39,6 +39,11 @@ using this_type = hsa_api_meta; \ using function_type = hsa_api_func::function_type; \ \ + static constexpr auto offset() \ + { \ + return offsetof(hsa_table_lookup::type, HSA_FUNC_PTR); \ + } \ + \ template \ static auto& get_table(TableT& _v) \ { \ diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp index c31249f245..bb7bb23af4 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp @@ -27,6 +27,7 @@ #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/hsa/details/ostream.hpp" +#include "lib/rocprofiler-sdk/hsa/pc_sampling.hpp" #include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp" #include "lib/rocprofiler-sdk/hsa/types.hpp" #include "lib/rocprofiler-sdk/hsa/utils.hpp" @@ -108,6 +109,10 @@ DEFINE_TABLE_VERSION(fini_ext, FINALIZER_API) DEFINE_TABLE_VERSION(img_ext, IMAGE_API) DEFINE_TABLE_VERSION(amd_tool, TOOLS_API) +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 +DEFINE_TABLE_VERSION(pc_sampling_ext, PC_SAMPLING_API) +#endif + #undef DEFINE_TABLE_VERSION #undef DEFINE_TABLE_VERSION_IMPL @@ -163,6 +168,17 @@ get_tracing_amd_tool_table() // table is never traced return val; } +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + +hsa_pc_sampling_ext_table_t* +get_tracing_pc_sampling_ext_table() // table is never traced +{ + static auto*& val = GET_TABLE_IMPL(pc_sampling_ext, tracing_table); + return val; +} + +#endif + hsa_table_version_t get_table_version() { @@ -204,17 +220,38 @@ get_amd_tool_table() return val; } +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + +hsa_pc_sampling_ext_table_t* +get_pc_sampling_ext_table() +{ + static auto*& val = GET_TABLE_IMPL(pc_sampling_ext, internal_table); + return val; +} + +#endif + #undef GET_TABLE_IMPL hsa_api_table_t& get_table() { +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + static auto tbl = hsa_api_table_t{.version = hsa_api_table_version, + .core_ = get_core_table(), + .amd_ext_ = get_amd_ext_table(), + .finalizer_ext_ = get_fini_ext_table(), + .image_ext_ = get_img_ext_table(), + .tools_ = get_amd_tool_table(), + .pc_sampling_ext_ = get_pc_sampling_ext_table()}; +#else static auto tbl = hsa_api_table_t{.version = hsa_api_table_version, .core_ = get_core_table(), .amd_ext_ = get_amd_ext_table(), .finalizer_ext_ = get_fini_ext_table(), .image_ext_ = get_img_ext_table(), .tools_ = get_amd_tool_table()}; +#endif return tbl; } @@ -725,6 +762,16 @@ update_table(hsa_amd_tool_table_t* _tbl, uint64_t _instv) scratch_memory::update_table(_tbl, _instv); } +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + +template <> +void +copy_table(hsa_pc_sampling_ext_table_t* _tbl, uint64_t _instv) +{ + pc_sampling::copy_table(_tbl, _instv); +} + +#endif #undef INSTANTIATE_HSA_TABLE_FUNC } // namespace hsa } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp index a5f27ea6cd..502b16874f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp @@ -37,6 +37,12 @@ HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_TABLE_ID_ImageExt, ::ImageExtTab HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_TABLE_ID_FinalizeExt, ::FinalizerExtTable, fini_ext) HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_TABLE_ID_AmdTool, ::ToolsApiTable, amd_tool) +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 +HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + ::PcSamplingExtTable, + pc_sampling_ext) +#endif + namespace rocprofiler { namespace hsa @@ -113,6 +119,18 @@ struct hsa_domain_info static constexpr auto external_correlation_id_domain_idx = ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE; }; + +template <> +struct hsa_domain_info +: hsa_domain_info +{ + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_NONE; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_NONE; + static constexpr auto none = 0; + static constexpr auto last = 0; + static constexpr auto external_correlation_id_domain_idx = + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE; +}; } // namespace hsa } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp index 5e001effab..81cae9d6f1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp @@ -22,13 +22,36 @@ #pragma once -#include #include +#include + #include #include #include +#if defined(HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION) && \ + HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION > 0x0 +# define ROCPROFILER_SDK_HSA_PC_SAMPLING 1 +#else +# define ROCPROFILER_SDK_HSA_PC_SAMPLING 0 +#endif + +// redundant check based on whether the pc sampling API header was found +#if defined __has_include +# if __has_include() +# if ROCPROFILER_SDK_HSA_PC_SAMPLING == 0 +# error \ + "rocprofiler-sdk disabled the HSA PC sampling table even though the hsa_ven_amd_pc_sampling.h was found" +# endif +# else +# if ROCPROFILER_SDK_HSA_PC_SAMPLING == 1 +# error \ + "rocprofiler-sdk enabled the HSA PC sampling table even though the hsa_ven_amd_pc_sampling.h was not found" +# endif +# endif +#endif + namespace rocprofiler { namespace hsa @@ -46,6 +69,9 @@ using hsa_amd_ext_table_t = ::AmdExtTable; using hsa_fini_ext_table_t = ::FinalizerExtTable; using hsa_img_ext_table_t = ::ImageExtTable; using hsa_amd_tool_table_t = ::ToolsApiTable; +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 +using hsa_pc_sampling_ext_table_t = ::PcSamplingExtTable; +#endif hsa_api_table_t& get_table(); @@ -68,6 +94,11 @@ get_img_ext_table(); hsa_amd_tool_table_t* get_amd_tool_table(); +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 +hsa_pc_sampling_ext_table_t* +get_pc_sampling_ext_table(); +#endif + hsa_core_table_t* get_tracing_core_table(); @@ -83,6 +114,11 @@ get_tracing_img_ext_table(); hsa_amd_tool_table_t* get_tracing_amd_tool_table(); +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 +hsa_pc_sampling_ext_table_t* +get_tracing_pc_sampling_ext_table(); +#endif + template struct hsa_table_lookup; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/pc_sampling.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/pc_sampling.cpp new file mode 100644 index 0000000000..7992edabed --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/pc_sampling.cpp @@ -0,0 +1,208 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/hsa/pc_sampling.hpp" + +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + +# include "lib/common/defines.hpp" +# include "lib/common/utility.hpp" +# include "lib/rocprofiler-sdk/buffer.hpp" +# include "lib/rocprofiler-sdk/context/context.hpp" +# include "lib/rocprofiler-sdk/hsa/defines.hpp" +# include "lib/rocprofiler-sdk/hsa/hsa.hpp" +# include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" + +# include +# include +# include +# include + +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include + +HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + ::PcSamplingExtTable, + pc_sampling_ext) + +namespace rocprofiler +{ +namespace hsa +{ +namespace +{ +enum pc_sampling_event_kind +{ + hsa_ven_amd_pcs_id_none = 0, + hsa_ven_amd_pcs_id_iterate_configuration, + hsa_ven_amd_pcs_id_create, + hsa_ven_amd_pcs_id_create_from_id, + hsa_ven_amd_pcs_id_destroy, + hsa_ven_amd_pcs_id_start, + hsa_ven_amd_pcs_id_stop, + hsa_ven_amd_pcs_id_flush, + hsa_ven_amd_pcs_id_last, +}; +} // namespace +} // namespace hsa +} // namespace rocprofiler + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_iterate_configuration, + hsa_ven_amd_pcs_iterate_configuration, + hsa_ven_amd_pcs_iterate_configuration_fn); + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_create, + hsa_ven_amd_pcs_create, + hsa_ven_amd_pcs_create_fn); + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_create_from_id, + hsa_ven_amd_pcs_create_from_id, + hsa_ven_amd_pcs_create_from_id_fn); + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_destroy, + hsa_ven_amd_pcs_destroy, + hsa_ven_amd_pcs_destroy_fn); + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_start, + hsa_ven_amd_pcs_start, + hsa_ven_amd_pcs_start_fn); + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_stop, + hsa_ven_amd_pcs_stop, + hsa_ven_amd_pcs_stop_fn); + +HSA_API_META_DEFINITION(ROCPROFILER_HSA_TABLE_ID_PcSamplingExt, + hsa_ven_amd_pcs_id_flush, + hsa_ven_amd_pcs_flush, + hsa_ven_amd_pcs_flush_fn); + +namespace rocprofiler +{ +namespace hsa +{ +namespace pc_sampling +{ +namespace +{ +template +void +copy_table(hsa_pc_sampling_ext_table_t* _orig, uint64_t _tbl_instance) +{ + using table_type = typename hsa_table_lookup::type; + + static_assert(std::is_same::value); + + if constexpr(OpIdx > hsa_ven_amd_pcs_id_none) + { + auto _info = hsa_api_meta{}; + + auto& _orig_table = _info.get_table(_orig); + auto& _orig_func = _info.get_table_func(_orig_table); + + if(_info.offset() >= _orig->version.minor_id) return; + + auto& _copy_table = _info.get_table(hsa_table_lookup{}(LookupT{})); + auto& _copy_func = _info.get_table_func(_copy_table); + + LOG_IF(FATAL, _copy_func && _tbl_instance == 0) + << _info.name << " has non-null function pointer " << _copy_func + << " despite this being the first instance of the library being copies"; + + if(!_copy_func) + { + LOG(INFO) << "copying table entry for " << _info.name; + _copy_func = _orig_func; + } + else + { + LOG(INFO) << "skipping copying table entry for " << _info.name + << " from table instance " << _tbl_instance; + } + } +} + +template +void +copy_table(hsa_pc_sampling_ext_table_t* _orig, + uint64_t _tbl_instance, + std::index_sequence) +{ + static_assert( + std::is_same::type>::value, + "unexpected type"); + + (copy_table(_orig, _tbl_instance), ...); +} + +} // namespace + +void +copy_table(hsa_pc_sampling_ext_table_t* _orig, uint64_t _tbl_instance) +{ + if(_orig) + copy_table( + _orig, _tbl_instance, std::make_index_sequence{}); +} + +void +update_table(hsa_pc_sampling_ext_table_t* /*_orig*/, uint64_t /*_tbl_instance*/) +{} +} // namespace pc_sampling +} // namespace hsa +} // namespace rocprofiler + +#else + +namespace rocprofiler +{ +namespace hsa +{ +namespace pc_sampling +{ +const char* name_by_id(uint32_t) { return nullptr; } + +std::vector +get_ids() +{ + return std::vector{}; +} +} // namespace pc_sampling +} // namespace hsa +} // namespace rocprofiler + +#endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/pc_sampling.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/pc_sampling.hpp new file mode 100644 index 0000000000..c3060495d1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/pc_sampling.hpp @@ -0,0 +1,55 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" + +#include + +#include +#include + +namespace rocprofiler +{ +namespace hsa +{ +namespace pc_sampling +{ +const char* +name_by_id(uint32_t id); + +std::vector +get_ids(); + +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + +void +copy_table(hsa_pc_sampling_ext_table_t* _orig, uint64_t lib_instance); + +void +update_table(hsa_pc_sampling_ext_table_t* _orig, uint64_t lib_instance); + +#endif +} // namespace pc_sampling +} // namespace hsa +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index 38af6c9f7b..ca19b1db6c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -742,6 +742,9 @@ rocprofiler_set_api_table(const char* name, rocprofiler::hsa::copy_table(hsa_api_table->image_ext_, lib_instance); rocprofiler::hsa::copy_table(hsa_api_table->finalizer_ext_, lib_instance); rocprofiler::hsa::copy_table(hsa_api_table->tools_, lib_instance); +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + rocprofiler::hsa::copy_table(hsa_api_table->pc_sampling_ext_, lib_instance); +#endif // need to construct agent mappings before initializing the queue controller rocprofiler::agent::construct_agent_cache(hsa_api_table); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp index 2b96edfd48..fac3d397a6 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp @@ -39,6 +39,11 @@ TEST(hsa, tables) auto* amd_ext = hsa::get_amd_ext_table(); auto* fini_ext = hsa::get_fini_ext_table(); auto* img_ext = hsa::get_img_ext_table(); + auto* amd_tool = hsa::get_amd_tool_table(); + +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + auto* pcs_ext = hsa::get_pc_sampling_ext_table(); +#endif // HsaApiTable instance auto table = hsa::get_table(); @@ -72,6 +77,18 @@ TEST(hsa, tables) EXPECT_EQ(img_ext->version.minor_id, sizeof(hsa::hsa_img_ext_table_t)); EXPECT_EQ(img_ext->version.step_id, HSA_IMAGE_API_TABLE_STEP_VERSION); + // make sure the version matches values from HSA header + EXPECT_EQ(amd_tool->version.major_id, HSA_TOOLS_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(amd_tool->version.minor_id, sizeof(hsa::hsa_amd_tool_table_t)); + EXPECT_EQ(amd_tool->version.step_id, HSA_TOOLS_API_TABLE_STEP_VERSION); + +#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0 + // make sure the version matches values from HSA header + EXPECT_EQ(pcs_ext->version.major_id, HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(pcs_ext->version.minor_id, sizeof(hsa::hsa_pc_sampling_ext_table_t)); + EXPECT_EQ(pcs_ext->version.step_id, HSA_PC_SAMPLING_API_TABLE_STEP_VERSION); +#endif + //------------------------------------------------------------------------// // checks between instances //------------------------------------------------------------------------//