From c0cb907fee79dc194e192bde130e5e4717656bdc Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 13 Sep 2023 13:14:28 -0500 Subject: [PATCH] Support different HSA table sizes (#44) * Support different HSA table sizes - Use hsa-runtime64_VERSION to define pp defs for major and minor version in version.h.in - Update version.h.in to define ROCPROFILER_HSA_RUNTIME_VERSION_{MAJOR,MINOR} - Use HSA_AMD_INTERFACE_VERSION_{MAJOR,MINOR} to handle hsa_amd_vmem_* support - add template specializations for hsa_amd_vmem_* functions - implement HSA version based static asserts * Debug commit - print pp value for ROCPROFILER_HSA_RUNTIME_VERSION and ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION * Debug commit - fix ROCPROFILER_HSA_RUNTIME_VERSION value * Remove debug edits * Update lib/rocprofiler/hsa/utils.hpp - support outputting: - hsa_amd_memory_pool_t - hsa_amd_vmem_alloc_handle_t - hsa_amd_memory_access_desc_t - hsa_amd_memory_pool_t * Update lib/rocprofiler/hsa/utils.hpp - tweak to join_impl * Update lib/rocprofiler/hsa/utils.hpp - use formatting when possible * Update lib/rocprofiler/hsa/types.hpp - Support API_TABLE_MAJOR_VERSIONS > 1 * Update lib/rocprofiler/hsa/types.hpp - remove inherit from undefined template specialization * Update lib/rocprofiler/hsa/utils.hpp - remove duplicate formatter specialization * Update include/rocprofiler/hsa/api_args.h - remove const from non-pointer anonymous structs in union * Use HSA_AMD_EXT_API_TABLE_MAJOR_VERSION --- cmake/rocprofiler_config_interfaces.cmake | 5 + source/include/rocprofiler/hsa/api_args.h | 75 ++++++++++++ source/include/rocprofiler/hsa/api_id.h | 17 +++ source/include/rocprofiler/version.h.in | 8 ++ source/lib/common/defines.hpp | 2 + source/lib/rocprofiler/hsa/hsa.def.cpp | 99 ++++++++++++++++ source/lib/rocprofiler/hsa/types.hpp | 135 ++++++++++++++++++++-- source/lib/rocprofiler/hsa/utils.hpp | 118 +++++++++++++++---- 8 files changed, 425 insertions(+), 34 deletions(-) diff --git a/cmake/rocprofiler_config_interfaces.cmake b/cmake/rocprofiler_config_interfaces.cmake index 768d257d98..4b4022cc19 100644 --- a/cmake/rocprofiler_config_interfaces.cmake +++ b/cmake/rocprofiler_config_interfaces.cmake @@ -119,6 +119,11 @@ find_package( ${rocm_version_DIR} ${ROCM_PATH}) +string(REPLACE "." ";" HSA_RUNTIME_VERSION "${hsa-runtime64_VERSION}") +# the following values are encoded into version.h +list(GET HSA_RUNTIME_VERSION 0 HSA_RUNTIME_VERSION_MAJOR) +list(GET HSA_RUNTIME_VERSION 1 HSA_RUNTIME_VERSION_MINOR) + target_link_libraries(rocprofiler-hsa-runtime INTERFACE hsa-runtime64::hsa-runtime64) # ----------------------------------------------------------------------------------------# diff --git a/source/include/rocprofiler/hsa/api_args.h b/source/include/rocprofiler/hsa/api_args.h index 8668e7cfd6..13133ba059 100644 --- a/source/include/rocprofiler/hsa/api_args.h +++ b/source/include/rocprofiler/hsa/api_args.h @@ -24,6 +24,7 @@ #include #include #include +#include typedef union rocprofiler_hsa_api_args_u { @@ -1221,4 +1222,78 @@ typedef union rocprofiler_hsa_api_args_u size_t image_data_slice_pitch; hsa_ext_image_t* image; } hsa_ext_image_create_with_layout; +#if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION >= 0x02 + struct + { + void** ptr; + size_t size; + uint64_t address; + uint64_t flags; + } hsa_amd_vmem_address_reserve; + struct + { + void* ptr; + size_t size; + } hsa_amd_vmem_address_free; + struct + { + hsa_amd_memory_pool_t pool; + size_t size; + hsa_amd_memory_type_t type; + uint64_t flags; + hsa_amd_vmem_alloc_handle_t* memory_handle; + } hsa_amd_vmem_handle_create; + struct + { + hsa_amd_vmem_alloc_handle_t memory_handle; + } hsa_amd_vmem_handle_release; + struct + { + void* va; + size_t size; + size_t in_offset; + hsa_amd_vmem_alloc_handle_t memory_handle; + uint64_t flags; + } hsa_amd_vmem_map; + struct + { + void* va; + size_t size; + } hsa_amd_vmem_unmap; + struct + { + void* va; + size_t size; + const hsa_amd_memory_access_desc_t* desc; + size_t desc_cnt; + } hsa_amd_vmem_set_access; + struct + { + void* va; + hsa_access_permission_t* perms; + hsa_agent_t agent_handle; + } hsa_amd_vmem_get_access; + struct + { + int* dmabuf_fd; + hsa_amd_vmem_alloc_handle_t handle; + uint64_t flags; + } hsa_amd_vmem_export_shareable_handle; + struct + { + int dmabuf_fd; + hsa_amd_vmem_alloc_handle_t* handle; + } hsa_amd_vmem_import_shareable_handle; + struct + { + hsa_amd_vmem_alloc_handle_t* handle; + void* addr; + } hsa_amd_vmem_retain_alloc_handle; + struct + { + hsa_amd_vmem_alloc_handle_t alloc_handle; + hsa_amd_memory_pool_t* pool; + hsa_amd_memory_type_t* type; + } hsa_amd_vmem_get_alloc_properties_from_handle; +#endif } rocprofiler_hsa_api_args_t; diff --git a/source/include/rocprofiler/hsa/api_id.h b/source/include/rocprofiler/hsa/api_id.h index c833964fa6..1a2007a431 100644 --- a/source/include/rocprofiler/hsa/api_id.h +++ b/source/include/rocprofiler/hsa/api_id.h @@ -20,6 +20,8 @@ #pragma once +#include + // NOLINTNEXTLINE(performance-enum-size) typedef enum { @@ -223,5 +225,20 @@ typedef enum ROCPROFILER_HSA_API_ID_hsa_ext_image_data_get_info_with_layout, ROCPROFILER_HSA_API_ID_hsa_ext_image_create_with_layout, +#if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION >= 0x02 + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_reserve, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_free, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_create, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_release, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_map, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_unmap, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_set_access, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_access, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_export_shareable_handle, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_import_shareable_handle, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_retain_alloc_handle, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_alloc_properties_from_handle, +#endif + ROCPROFILER_HSA_API_ID_LAST, } rocprofiler_hsa_api_id_t; diff --git a/source/include/rocprofiler/version.h.in b/source/include/rocprofiler/version.h.in index 4e24a75f48..76f4cf2683 100644 --- a/source/include/rocprofiler/version.h.in +++ b/source/include/rocprofiler/version.h.in @@ -52,6 +52,9 @@ // compiler information #define ROCPROFILER_COMPILER_ID "@CMAKE_CXX_COMPILER_ID@" #define ROCPROFILER_COMPILER_VERSION "@CMAKE_CXX_COMPILER_VERSION@" + +#define ROCPROFILER_HSA_RUNTIME_VERSION_MAJOR @HSA_RUNTIME_VERSION_MAJOR@ +#define ROCPROFILER_HSA_RUNTIME_VERSION_MINOR @HSA_RUNTIME_VERSION_MINOR@ // clang-format on #define ROCPROFILER_COMPILER_STRING ROCPROFILER_COMPILER_ID " v" ROCPROFILER_COMPILER_VERSION @@ -59,3 +62,8 @@ #define ROCPROFILER_VERSION \ ((10000 * ROCPROFILER_VERSION_MAJOR) + (100 * ROCPROFILER_VERSION_MINOR) + \ ROCPROFILER_VERSION_PATCH) + +// latest hsa-runtime version supported +#define ROCPROFILER_HSA_RUNTIME_VERSION \ + ((10000 * ROCPROFILER_HSA_RUNTIME_VERSION_MAJOR) + \ + (100 * ROCPROFILER_HSA_RUNTIME_VERSION_MINOR)) diff --git a/source/lib/common/defines.hpp b/source/lib/common/defines.hpp index 19f515fc14..7fc7f74f1d 100644 --- a/source/lib/common/defines.hpp +++ b/source/lib/common/defines.hpp @@ -62,3 +62,5 @@ # define ROCPROFILER_FOLD_EXPRESSION(...) ((__VA_ARGS__), ...) # endif #endif + +#define ROCPROFILER_COMPUTE_VERSION(MAJOR, MINOR, PATCH) ((10000 * MAJOR) + (100 * MINOR) + (PATCH)) diff --git a/source/lib/rocprofiler/hsa/hsa.def.cpp b/source/lib/rocprofiler/hsa/hsa.def.cpp index 33ebc2fc26..51ba7ecc8a 100644 --- a/source/lib/rocprofiler/hsa/hsa.def.cpp +++ b/source/lib/rocprofiler/hsa/hsa.def.cpp @@ -222,3 +222,102 @@ HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, ROCPROFILE HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, ROCPROFILER_HSA_API_TABLE_ID_AmdExt, ROCPROFILER_HSA_API_ID_hsa_amd_queue_intercept_register, hsa_amd_queue_intercept_register, hsa_amd_queue_intercept_register_fn, queue, callback, user_data) HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, ROCPROFILER_HSA_API_TABLE_ID_AmdExt, ROCPROFILER_HSA_API_ID_hsa_amd_runtime_queue_create_register, hsa_amd_runtime_queue_create_register, hsa_amd_runtime_queue_create_register_fn, callback, user_data) // clang-format on + +#if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION >= 0x02 +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_reserve, + hsa_amd_vmem_address_reserve, + hsa_amd_vmem_address_reserve_fn, + ptr, + size, + address, + flags) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_free, + hsa_amd_vmem_address_free, + hsa_amd_vmem_address_free_fn, + ptr, + size) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_create, + hsa_amd_vmem_handle_create, + hsa_amd_vmem_handle_create_fn, + pool, + size, + type, + flags, + memory_handle) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_release, + hsa_amd_vmem_handle_release, + hsa_amd_vmem_handle_release_fn, + memory_handle) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_map, + hsa_amd_vmem_map, + hsa_amd_vmem_map_fn, + va, + size, + in_offset, + memory_handle, + flags) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_unmap, + hsa_amd_vmem_unmap, + hsa_amd_vmem_unmap_fn, + va, + size) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_set_access, + hsa_amd_vmem_set_access, + hsa_amd_vmem_set_access_fn, + va, + size, + desc, + desc_cnt) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_access, + hsa_amd_vmem_get_access, + hsa_amd_vmem_get_access_fn, + va, + perms, + agent_handle) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_export_shareable_handle, + hsa_amd_vmem_export_shareable_handle, + hsa_amd_vmem_export_shareable_handle_fn, + dmabuf_fd, + handle, + flags) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_import_shareable_handle, + hsa_amd_vmem_import_shareable_handle, + hsa_amd_vmem_import_shareable_handle_fn, + dmabuf_fd, + handle) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_retain_alloc_handle, + hsa_amd_vmem_retain_alloc_handle, + hsa_amd_vmem_retain_alloc_handle_fn, + handle, + addr) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_alloc_properties_from_handle, + hsa_amd_vmem_get_alloc_properties_from_handle, + hsa_amd_vmem_get_alloc_properties_from_handle_fn, + alloc_handle, + pool, + type) +#endif diff --git a/source/lib/rocprofiler/hsa/types.hpp b/source/lib/rocprofiler/hsa/types.hpp index 7196f51880..8b41bd1db7 100644 --- a/source/lib/rocprofiler/hsa/types.hpp +++ b/source/lib/rocprofiler/hsa/types.hpp @@ -20,11 +20,17 @@ #pragma once +#include "lib/common/defines.hpp" #include "rocprofiler/hsa.h" +#include "rocprofiler/version.h" #ifndef ROCPROFILER_UNSAFE_NO_VERSION_CHECK # if defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0 # if HSA_API_TABLE_MAJOR_VERSION <= 0x01 +namespace rocprofiler +{ +namespace hsa +{ static_assert(HSA_CORE_API_TABLE_MAJOR_VERSION == 0x01, "Change in the major version of HSA core API table"); static_assert(HSA_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x01, @@ -33,8 +39,85 @@ static_assert(HSA_FINALIZER_API_TABLE_MAJOR_VERSION == 0x01, "Change in the major version of HSA finalizer API table"); static_assert(HSA_IMAGE_API_TABLE_MAJOR_VERSION == 0x01, "Change in the major version of HSA image API table"); -static_assert(HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION == 0x01, - "Change in the major version of HSA aqlprofile API table"); + +static_assert(HSA_CORE_API_TABLE_STEP_VERSION == 0x00, + "Change in the major version of HSA core API table"); +static_assert(HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x00, + "Change in the major version of HSA amd-extended API table"); +static_assert(HSA_FINALIZER_API_TABLE_STEP_VERSION == 0x00, + "Change in the major version of HSA finalizer API table"); +static_assert(HSA_IMAGE_API_TABLE_STEP_VERSION == 0x00, + "Change in the major version of HSA image API table"); + +// this should always be updated to latest table size +template +struct table_size; + +// latest version of hsa runtime that has been updated for support by rocprofiler +// and the current version of hsa runtime during this compilation +constexpr size_t latest_version = ROCPROFILER_COMPUTE_VERSION(1, 11, 0); +constexpr size_t current_version = ROCPROFILER_HSA_RUNTIME_VERSION; + +// aliases to the template specializations providing the table size info +using current_table_size_t = table_size; +using latest_table_size_t = table_size; + +// specialization for v1.9 +template <> +struct table_size +{ + static constexpr size_t finalizer_ext = 64; + static constexpr size_t image_ext = 120; + static constexpr size_t amd_ext = 456; + static constexpr size_t core_api_ext = 1016; +}; + +// specialization for v1.10 - increased amd_ext by 10 functions +template <> +struct table_size +: table_size +{ + static constexpr size_t amd_ext = 552; +}; + +// version 1.11 is same as 1.10 +template <> +struct table_size +: table_size +{}; + +// default static asserts to check against latest version +// e.g. v1.12 might have the same table sizes as v1.11 so +// we don't want to fail to compile if nothing has changed +template +struct table_size : latest_table_size_t +{}; + +// if you hit these static asserts, that means HSA added entries to the table but did not update the +// step numbers +static_assert(sizeof(FinalizerExtTable) == current_table_size_t::finalizer_ext, + "HSA finalizer API table size changed or version not supported"); +static_assert(sizeof(ImageExtTable) == current_table_size_t::image_ext, + "HSA image-extended API table size changed or version not supported"); +static_assert(sizeof(AmdExtTable) == current_table_size_t::amd_ext, + "HSA amd-extended API table size changed or version not supported"); +static_assert(sizeof(CoreApiTable) == current_table_size_t::core_api_ext, + "HSA core API table size changed or version not supported"); +} // namespace hsa +} // namespace rocprofiler +# else +namespace rocprofiler +{ +namespace hsa +{ +static_assert(HSA_CORE_API_TABLE_MAJOR_VERSION == 0x02, + "Change in the major version of HSA core API table"); +static_assert(HSA_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x02, + "Change in the major version of HSA amd-extended API table"); +static_assert(HSA_FINALIZER_API_TABLE_MAJOR_VERSION == 0x02, + "Change in the major version of HSA finalizer API table"); +static_assert(HSA_IMAGE_API_TABLE_MAJOR_VERSION == 0x02, + "Change in the major version of HSA image API table"); static_assert(HSA_CORE_API_TABLE_STEP_VERSION == 0x00, "Change in the major version of HSA core API table"); @@ -47,14 +130,48 @@ static_assert(HSA_IMAGE_API_TABLE_STEP_VERSION == 0x00, static_assert(HSA_AQLPROFILE_API_TABLE_STEP_VERSION == 0x00, "Change in the major version of HSA aqlprofile API table"); +// this should always be updated to latest table size +template +struct table_size; + +// latest version of hsa runtime that has been updated for support by rocprofiler +// and the current version of hsa runtime during this compilation +constexpr size_t latest_version = ROCPROFILER_COMPUTE_VERSION(1, 12, 0); +constexpr size_t current_version = ROCPROFILER_HSA_RUNTIME_VERSION; + +// aliases to the template specializations providing the table size info +using current_table_size_t = table_size; +using latest_table_size_t = table_size; + +// specialization for v1.12 +template <> +struct table_size +{ + static constexpr size_t finalizer_ext = 64; + static constexpr size_t image_ext = 120; + static constexpr size_t amd_ext = 552; + static constexpr size_t core_api_ext = 1016; +}; + +// default static asserts to check against latest version +// e.g. v1.12 might have the same table sizes as v1.11 so +// we don't want to fail to compile if nothing has changed +template +struct table_size : latest_table_size_t +{}; + // if you hit these static asserts, that means HSA added entries to the table but did not update the // step numbers -static_assert(sizeof(FinalizerExtTable) == 64, "HSA finalizer API table size changed"); -static_assert(sizeof(ImageExtTable) == 120, "HSA image-extended API table size changed"); -static_assert(sizeof(AmdExtTable) == 552, "HSA amd-extended API table size changed"); -static_assert(sizeof(CoreApiTable) == 1016, "HSA core API table size changed"); -# else -# error "HSA_API_TABLE_MAJOR_VERSION not supported" +static_assert(sizeof(FinalizerExtTable) == current_table_size_t::finalizer_ext, + "HSA finalizer API table size changed or version not supported"); +static_assert(sizeof(ImageExtTable) == current_table_size_t::image_ext, + "HSA image-extended API table size changed or version not supported"); +static_assert(sizeof(AmdExtTable) == current_table_size_t::amd_ext, + "HSA amd-extended API table size changed or version not supported"); +static_assert(sizeof(CoreApiTable) == current_table_size_t::core_api_ext, + "HSA core API table size changed or version not supported"); +} // namespace hsa +} // namespace rocprofiler # endif # endif -#endif \ No newline at end of file +#endif diff --git a/source/lib/rocprofiler/hsa/utils.hpp b/source/lib/rocprofiler/hsa/utils.hpp index 3515f2dbdf..fd3a361bd4 100644 --- a/source/lib/rocprofiler/hsa/utils.hpp +++ b/source/lib/rocprofiler/hsa/utils.hpp @@ -20,20 +20,54 @@ #pragma once -#include -#include +#include + +#include + +#include "fmt/core.h" +#include "fmt/ranges.h" + #include #include #include +#include #include #include +#if !defined(ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION) +# define ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION \ + ((10000 * HSA_AMD_INTERFACE_VERSION_MAJOR) + (100 * HSA_AMD_INTERFACE_VERSION_MINOR)) +#endif + namespace rocprofiler { namespace hsa { namespace utils { +template ::value, int> = 0> +std::string +stringize_impl(Tp _v, int) +{ + return fmt::format("{}", _v); +} + +template +std::string +stringize_impl(Tp _v, long) +{ + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); +} + +template +auto +stringize_impl(const std::pair& _v, int) +{ + return std::make_pair(stringize_impl(_v.first, 0), stringize_impl(_v.second, 0)); +} + struct join_args { std::string_view prefix = {}; @@ -42,19 +76,17 @@ struct join_args }; template -auto +std::string join_impl(const Tp& _v) { - return _v; + return stringize_impl(_v, 0); } template -auto +std::string join_impl(const std::pair& _v) { - auto _ss = std::stringstream{}; - _ss << _v.first << "=" << _v.second; - return _ss.str(); + return fmt::format("{}={}", join_impl(_v.first), join_impl(_v.second)); } template @@ -72,28 +104,64 @@ join(join_args ja, Args... args) return (std::stringstream{} << ja.prefix << _content << ja.suffix).str(); } -template -auto -stringize_impl(const Tp& _v) -{ - auto _ss = std::stringstream{}; - _ss << _v; - return _ss.str(); -} - -template -auto -stringize_impl(const std::pair& _v) -{ - return std::make_pair(stringize_impl(_v.first), stringize_impl(_v.second)); -} - template auto stringize(Args... args) { - return std::vector>{stringize_impl(args)...}; + return std::vector>{stringize_impl(args, 0)...}; } + +template +struct handle_formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const Tp& v, Ctx& ctx) const + { + return fmt::format_to(ctx.out(), "handle={}", v.handle); + } +}; + +template +struct handle_formatter : handle_formatter +{}; } // namespace utils } // namespace hsa } // namespace rocprofiler + +#if ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION >= 10300 +namespace fmt +{ +template <> +struct formatter +: rocprofiler::hsa::utils::handle_formatter +{}; + +template <> +struct formatter +: rocprofiler::hsa::utils::handle_formatter +{}; + +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const hsa_amd_memory_access_desc_t& v, Ctx& ctx) const + { + return fmt::format_to( + ctx.out(), "permissions={}, agent_handle={}", v.permissions, v.agent_handle); + } +}; +} // namespace fmt +#endif