Support different HSA table sizes (#44)
* Support different HSA table sizes
- Use hsa-runtime64_VERSION to define pp defs for major and minor version in version.h.in
- Update version.h.in to define ROCPROFILER_HSA_RUNTIME_VERSION_{MAJOR,MINOR}
- Use HSA_AMD_INTERFACE_VERSION_{MAJOR,MINOR} to handle hsa_amd_vmem_* support
- add template specializations for hsa_amd_vmem_* functions
- implement HSA version based static asserts
* Debug commit
- print pp value for ROCPROFILER_HSA_RUNTIME_VERSION and ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION
* Debug commit
- fix ROCPROFILER_HSA_RUNTIME_VERSION value
* Remove debug edits
* Update lib/rocprofiler/hsa/utils.hpp
- support outputting:
- hsa_amd_memory_pool_t
- hsa_amd_vmem_alloc_handle_t
- hsa_amd_memory_access_desc_t
- hsa_amd_memory_pool_t
* Update lib/rocprofiler/hsa/utils.hpp
- tweak to join_impl
* Update lib/rocprofiler/hsa/utils.hpp
- use formatting when possible
* Update lib/rocprofiler/hsa/types.hpp
- Support API_TABLE_MAJOR_VERSIONS > 1
* Update lib/rocprofiler/hsa/types.hpp
- remove inherit from undefined template specialization
* Update lib/rocprofiler/hsa/utils.hpp
- remove duplicate formatter specialization
* Update include/rocprofiler/hsa/api_args.h
- remove const from non-pointer anonymous structs in union
* Use HSA_AMD_EXT_API_TABLE_MAJOR_VERSION
Tento commit je obsažen v:
@@ -119,6 +119,11 @@ find_package(
|
||||
${rocm_version_DIR}
|
||||
${ROCM_PATH})
|
||||
|
||||
string(REPLACE "." ";" HSA_RUNTIME_VERSION "${hsa-runtime64_VERSION}")
|
||||
# the following values are encoded into version.h
|
||||
list(GET HSA_RUNTIME_VERSION 0 HSA_RUNTIME_VERSION_MAJOR)
|
||||
list(GET HSA_RUNTIME_VERSION 1 HSA_RUNTIME_VERSION_MINOR)
|
||||
|
||||
target_link_libraries(rocprofiler-hsa-runtime INTERFACE hsa-runtime64::hsa-runtime64)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ext_image.h>
|
||||
#include <rocprofiler/version.h>
|
||||
|
||||
typedef union rocprofiler_hsa_api_args_u
|
||||
{
|
||||
@@ -1221,4 +1222,78 @@ typedef union rocprofiler_hsa_api_args_u
|
||||
size_t image_data_slice_pitch;
|
||||
hsa_ext_image_t* image;
|
||||
} hsa_ext_image_create_with_layout;
|
||||
#if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION >= 0x02
|
||||
struct
|
||||
{
|
||||
void** ptr;
|
||||
size_t size;
|
||||
uint64_t address;
|
||||
uint64_t flags;
|
||||
} hsa_amd_vmem_address_reserve;
|
||||
struct
|
||||
{
|
||||
void* ptr;
|
||||
size_t size;
|
||||
} hsa_amd_vmem_address_free;
|
||||
struct
|
||||
{
|
||||
hsa_amd_memory_pool_t pool;
|
||||
size_t size;
|
||||
hsa_amd_memory_type_t type;
|
||||
uint64_t flags;
|
||||
hsa_amd_vmem_alloc_handle_t* memory_handle;
|
||||
} hsa_amd_vmem_handle_create;
|
||||
struct
|
||||
{
|
||||
hsa_amd_vmem_alloc_handle_t memory_handle;
|
||||
} hsa_amd_vmem_handle_release;
|
||||
struct
|
||||
{
|
||||
void* va;
|
||||
size_t size;
|
||||
size_t in_offset;
|
||||
hsa_amd_vmem_alloc_handle_t memory_handle;
|
||||
uint64_t flags;
|
||||
} hsa_amd_vmem_map;
|
||||
struct
|
||||
{
|
||||
void* va;
|
||||
size_t size;
|
||||
} hsa_amd_vmem_unmap;
|
||||
struct
|
||||
{
|
||||
void* va;
|
||||
size_t size;
|
||||
const hsa_amd_memory_access_desc_t* desc;
|
||||
size_t desc_cnt;
|
||||
} hsa_amd_vmem_set_access;
|
||||
struct
|
||||
{
|
||||
void* va;
|
||||
hsa_access_permission_t* perms;
|
||||
hsa_agent_t agent_handle;
|
||||
} hsa_amd_vmem_get_access;
|
||||
struct
|
||||
{
|
||||
int* dmabuf_fd;
|
||||
hsa_amd_vmem_alloc_handle_t handle;
|
||||
uint64_t flags;
|
||||
} hsa_amd_vmem_export_shareable_handle;
|
||||
struct
|
||||
{
|
||||
int dmabuf_fd;
|
||||
hsa_amd_vmem_alloc_handle_t* handle;
|
||||
} hsa_amd_vmem_import_shareable_handle;
|
||||
struct
|
||||
{
|
||||
hsa_amd_vmem_alloc_handle_t* handle;
|
||||
void* addr;
|
||||
} hsa_amd_vmem_retain_alloc_handle;
|
||||
struct
|
||||
{
|
||||
hsa_amd_vmem_alloc_handle_t alloc_handle;
|
||||
hsa_amd_memory_pool_t* pool;
|
||||
hsa_amd_memory_type_t* type;
|
||||
} hsa_amd_vmem_get_alloc_properties_from_handle;
|
||||
#endif
|
||||
} rocprofiler_hsa_api_args_t;
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler/version.h>
|
||||
|
||||
// NOLINTNEXTLINE(performance-enum-size)
|
||||
typedef enum
|
||||
{
|
||||
@@ -223,5 +225,20 @@ typedef enum
|
||||
ROCPROFILER_HSA_API_ID_hsa_ext_image_data_get_info_with_layout,
|
||||
ROCPROFILER_HSA_API_ID_hsa_ext_image_create_with_layout,
|
||||
|
||||
#if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION >= 0x02
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_reserve,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_free,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_create,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_release,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_map,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_unmap,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_set_access,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_access,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_export_shareable_handle,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_import_shareable_handle,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_retain_alloc_handle,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_alloc_properties_from_handle,
|
||||
#endif
|
||||
|
||||
ROCPROFILER_HSA_API_ID_LAST,
|
||||
} rocprofiler_hsa_api_id_t;
|
||||
|
||||
@@ -52,6 +52,9 @@
|
||||
// compiler information
|
||||
#define ROCPROFILER_COMPILER_ID "@CMAKE_CXX_COMPILER_ID@"
|
||||
#define ROCPROFILER_COMPILER_VERSION "@CMAKE_CXX_COMPILER_VERSION@"
|
||||
|
||||
#define ROCPROFILER_HSA_RUNTIME_VERSION_MAJOR @HSA_RUNTIME_VERSION_MAJOR@
|
||||
#define ROCPROFILER_HSA_RUNTIME_VERSION_MINOR @HSA_RUNTIME_VERSION_MINOR@
|
||||
// clang-format on
|
||||
|
||||
#define ROCPROFILER_COMPILER_STRING ROCPROFILER_COMPILER_ID " v" ROCPROFILER_COMPILER_VERSION
|
||||
@@ -59,3 +62,8 @@
|
||||
#define ROCPROFILER_VERSION \
|
||||
((10000 * ROCPROFILER_VERSION_MAJOR) + (100 * ROCPROFILER_VERSION_MINOR) + \
|
||||
ROCPROFILER_VERSION_PATCH)
|
||||
|
||||
// latest hsa-runtime version supported
|
||||
#define ROCPROFILER_HSA_RUNTIME_VERSION \
|
||||
((10000 * ROCPROFILER_HSA_RUNTIME_VERSION_MAJOR) + \
|
||||
(100 * ROCPROFILER_HSA_RUNTIME_VERSION_MINOR))
|
||||
|
||||
@@ -62,3 +62,5 @@
|
||||
# define ROCPROFILER_FOLD_EXPRESSION(...) ((__VA_ARGS__), ...)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define ROCPROFILER_COMPUTE_VERSION(MAJOR, MINOR, PATCH) ((10000 * MAJOR) + (100 * MINOR) + (PATCH))
|
||||
|
||||
@@ -222,3 +222,102 @@ HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, ROCPROFILE
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, ROCPROFILER_HSA_API_TABLE_ID_AmdExt, ROCPROFILER_HSA_API_ID_hsa_amd_queue_intercept_register, hsa_amd_queue_intercept_register, hsa_amd_queue_intercept_register_fn, queue, callback, user_data)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, ROCPROFILER_HSA_API_TABLE_ID_AmdExt, ROCPROFILER_HSA_API_ID_hsa_amd_runtime_queue_create_register, hsa_amd_runtime_queue_create_register, hsa_amd_runtime_queue_create_register_fn, callback, user_data)
|
||||
// clang-format on
|
||||
|
||||
#if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION >= 0x02
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_reserve,
|
||||
hsa_amd_vmem_address_reserve,
|
||||
hsa_amd_vmem_address_reserve_fn,
|
||||
ptr,
|
||||
size,
|
||||
address,
|
||||
flags)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_address_free,
|
||||
hsa_amd_vmem_address_free,
|
||||
hsa_amd_vmem_address_free_fn,
|
||||
ptr,
|
||||
size)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_create,
|
||||
hsa_amd_vmem_handle_create,
|
||||
hsa_amd_vmem_handle_create_fn,
|
||||
pool,
|
||||
size,
|
||||
type,
|
||||
flags,
|
||||
memory_handle)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_handle_release,
|
||||
hsa_amd_vmem_handle_release,
|
||||
hsa_amd_vmem_handle_release_fn,
|
||||
memory_handle)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_map,
|
||||
hsa_amd_vmem_map,
|
||||
hsa_amd_vmem_map_fn,
|
||||
va,
|
||||
size,
|
||||
in_offset,
|
||||
memory_handle,
|
||||
flags)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_unmap,
|
||||
hsa_amd_vmem_unmap,
|
||||
hsa_amd_vmem_unmap_fn,
|
||||
va,
|
||||
size)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_set_access,
|
||||
hsa_amd_vmem_set_access,
|
||||
hsa_amd_vmem_set_access_fn,
|
||||
va,
|
||||
size,
|
||||
desc,
|
||||
desc_cnt)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_access,
|
||||
hsa_amd_vmem_get_access,
|
||||
hsa_amd_vmem_get_access_fn,
|
||||
va,
|
||||
perms,
|
||||
agent_handle)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_export_shareable_handle,
|
||||
hsa_amd_vmem_export_shareable_handle,
|
||||
hsa_amd_vmem_export_shareable_handle_fn,
|
||||
dmabuf_fd,
|
||||
handle,
|
||||
flags)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_import_shareable_handle,
|
||||
hsa_amd_vmem_import_shareable_handle,
|
||||
hsa_amd_vmem_import_shareable_handle_fn,
|
||||
dmabuf_fd,
|
||||
handle)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_retain_alloc_handle,
|
||||
hsa_amd_vmem_retain_alloc_handle,
|
||||
hsa_amd_vmem_retain_alloc_handle_fn,
|
||||
handle,
|
||||
addr)
|
||||
HSA_API_INFO_DEFINITION_V(ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API,
|
||||
ROCPROFILER_HSA_API_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_API_ID_hsa_amd_vmem_get_alloc_properties_from_handle,
|
||||
hsa_amd_vmem_get_alloc_properties_from_handle,
|
||||
hsa_amd_vmem_get_alloc_properties_from_handle_fn,
|
||||
alloc_handle,
|
||||
pool,
|
||||
type)
|
||||
#endif
|
||||
|
||||
@@ -20,11 +20,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "rocprofiler/hsa.h"
|
||||
#include "rocprofiler/version.h"
|
||||
|
||||
#ifndef ROCPROFILER_UNSAFE_NO_VERSION_CHECK
|
||||
# if defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0
|
||||
# if HSA_API_TABLE_MAJOR_VERSION <= 0x01
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
static_assert(HSA_CORE_API_TABLE_MAJOR_VERSION == 0x01,
|
||||
"Change in the major version of HSA core API table");
|
||||
static_assert(HSA_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x01,
|
||||
@@ -33,8 +39,85 @@ static_assert(HSA_FINALIZER_API_TABLE_MAJOR_VERSION == 0x01,
|
||||
"Change in the major version of HSA finalizer API table");
|
||||
static_assert(HSA_IMAGE_API_TABLE_MAJOR_VERSION == 0x01,
|
||||
"Change in the major version of HSA image API table");
|
||||
static_assert(HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION == 0x01,
|
||||
"Change in the major version of HSA aqlprofile API table");
|
||||
|
||||
static_assert(HSA_CORE_API_TABLE_STEP_VERSION == 0x00,
|
||||
"Change in the major version of HSA core API table");
|
||||
static_assert(HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x00,
|
||||
"Change in the major version of HSA amd-extended API table");
|
||||
static_assert(HSA_FINALIZER_API_TABLE_STEP_VERSION == 0x00,
|
||||
"Change in the major version of HSA finalizer API table");
|
||||
static_assert(HSA_IMAGE_API_TABLE_STEP_VERSION == 0x00,
|
||||
"Change in the major version of HSA image API table");
|
||||
|
||||
// this should always be updated to latest table size
|
||||
template <size_t VersionCode>
|
||||
struct table_size;
|
||||
|
||||
// latest version of hsa runtime that has been updated for support by rocprofiler
|
||||
// and the current version of hsa runtime during this compilation
|
||||
constexpr size_t latest_version = ROCPROFILER_COMPUTE_VERSION(1, 11, 0);
|
||||
constexpr size_t current_version = ROCPROFILER_HSA_RUNTIME_VERSION;
|
||||
|
||||
// aliases to the template specializations providing the table size info
|
||||
using current_table_size_t = table_size<current_version>;
|
||||
using latest_table_size_t = table_size<latest_version>;
|
||||
|
||||
// specialization for v1.9
|
||||
template <>
|
||||
struct table_size<ROCPROFILER_COMPUTE_VERSION(1, 9, 0)>
|
||||
{
|
||||
static constexpr size_t finalizer_ext = 64;
|
||||
static constexpr size_t image_ext = 120;
|
||||
static constexpr size_t amd_ext = 456;
|
||||
static constexpr size_t core_api_ext = 1016;
|
||||
};
|
||||
|
||||
// specialization for v1.10 - increased amd_ext by 10 functions
|
||||
template <>
|
||||
struct table_size<ROCPROFILER_COMPUTE_VERSION(1, 10, 0)>
|
||||
: table_size<ROCPROFILER_COMPUTE_VERSION(1, 9, 0)>
|
||||
{
|
||||
static constexpr size_t amd_ext = 552;
|
||||
};
|
||||
|
||||
// version 1.11 is same as 1.10
|
||||
template <>
|
||||
struct table_size<ROCPROFILER_COMPUTE_VERSION(1, 11, 0)>
|
||||
: table_size<ROCPROFILER_COMPUTE_VERSION(1, 10, 0)>
|
||||
{};
|
||||
|
||||
// default static asserts to check against latest version
|
||||
// e.g. v1.12 might have the same table sizes as v1.11 so
|
||||
// we don't want to fail to compile if nothing has changed
|
||||
template <size_t VersionCode>
|
||||
struct table_size : latest_table_size_t
|
||||
{};
|
||||
|
||||
// if you hit these static asserts, that means HSA added entries to the table but did not update the
|
||||
// step numbers
|
||||
static_assert(sizeof(FinalizerExtTable) == current_table_size_t::finalizer_ext,
|
||||
"HSA finalizer API table size changed or version not supported");
|
||||
static_assert(sizeof(ImageExtTable) == current_table_size_t::image_ext,
|
||||
"HSA image-extended API table size changed or version not supported");
|
||||
static_assert(sizeof(AmdExtTable) == current_table_size_t::amd_ext,
|
||||
"HSA amd-extended API table size changed or version not supported");
|
||||
static_assert(sizeof(CoreApiTable) == current_table_size_t::core_api_ext,
|
||||
"HSA core API table size changed or version not supported");
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
# else
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
static_assert(HSA_CORE_API_TABLE_MAJOR_VERSION == 0x02,
|
||||
"Change in the major version of HSA core API table");
|
||||
static_assert(HSA_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x02,
|
||||
"Change in the major version of HSA amd-extended API table");
|
||||
static_assert(HSA_FINALIZER_API_TABLE_MAJOR_VERSION == 0x02,
|
||||
"Change in the major version of HSA finalizer API table");
|
||||
static_assert(HSA_IMAGE_API_TABLE_MAJOR_VERSION == 0x02,
|
||||
"Change in the major version of HSA image API table");
|
||||
|
||||
static_assert(HSA_CORE_API_TABLE_STEP_VERSION == 0x00,
|
||||
"Change in the major version of HSA core API table");
|
||||
@@ -47,14 +130,48 @@ static_assert(HSA_IMAGE_API_TABLE_STEP_VERSION == 0x00,
|
||||
static_assert(HSA_AQLPROFILE_API_TABLE_STEP_VERSION == 0x00,
|
||||
"Change in the major version of HSA aqlprofile API table");
|
||||
|
||||
// this should always be updated to latest table size
|
||||
template <size_t VersionCode>
|
||||
struct table_size;
|
||||
|
||||
// latest version of hsa runtime that has been updated for support by rocprofiler
|
||||
// and the current version of hsa runtime during this compilation
|
||||
constexpr size_t latest_version = ROCPROFILER_COMPUTE_VERSION(1, 12, 0);
|
||||
constexpr size_t current_version = ROCPROFILER_HSA_RUNTIME_VERSION;
|
||||
|
||||
// aliases to the template specializations providing the table size info
|
||||
using current_table_size_t = table_size<current_version>;
|
||||
using latest_table_size_t = table_size<latest_version>;
|
||||
|
||||
// specialization for v1.12
|
||||
template <>
|
||||
struct table_size<ROCPROFILER_COMPUTE_VERSION(1, 12, 0)>
|
||||
{
|
||||
static constexpr size_t finalizer_ext = 64;
|
||||
static constexpr size_t image_ext = 120;
|
||||
static constexpr size_t amd_ext = 552;
|
||||
static constexpr size_t core_api_ext = 1016;
|
||||
};
|
||||
|
||||
// default static asserts to check against latest version
|
||||
// e.g. v1.12 might have the same table sizes as v1.11 so
|
||||
// we don't want to fail to compile if nothing has changed
|
||||
template <size_t VersionCode>
|
||||
struct table_size : latest_table_size_t
|
||||
{};
|
||||
|
||||
// if you hit these static asserts, that means HSA added entries to the table but did not update the
|
||||
// step numbers
|
||||
static_assert(sizeof(FinalizerExtTable) == 64, "HSA finalizer API table size changed");
|
||||
static_assert(sizeof(ImageExtTable) == 120, "HSA image-extended API table size changed");
|
||||
static_assert(sizeof(AmdExtTable) == 552, "HSA amd-extended API table size changed");
|
||||
static_assert(sizeof(CoreApiTable) == 1016, "HSA core API table size changed");
|
||||
# else
|
||||
# error "HSA_API_TABLE_MAJOR_VERSION not supported"
|
||||
static_assert(sizeof(FinalizerExtTable) == current_table_size_t::finalizer_ext,
|
||||
"HSA finalizer API table size changed or version not supported");
|
||||
static_assert(sizeof(ImageExtTable) == current_table_size_t::image_ext,
|
||||
"HSA image-extended API table size changed or version not supported");
|
||||
static_assert(sizeof(AmdExtTable) == current_table_size_t::amd_ext,
|
||||
"HSA amd-extended API table size changed or version not supported");
|
||||
static_assert(sizeof(CoreApiTable) == current_table_size_t::core_api_ext,
|
||||
"HSA core API table size changed or version not supported");
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -20,20 +20,54 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <rocprofiler/version.h>
|
||||
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include "fmt/core.h"
|
||||
#include "fmt/ranges.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if !defined(ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION)
|
||||
# define ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION \
|
||||
((10000 * HSA_AMD_INTERFACE_VERSION_MAJOR) + (100 * HSA_AMD_INTERFACE_VERSION_MINOR))
|
||||
#endif
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
namespace utils
|
||||
{
|
||||
template <typename Tp, typename Up = Tp, std::enable_if_t<fmt::is_formattable<Tp>::value, int> = 0>
|
||||
std::string
|
||||
stringize_impl(Tp _v, int)
|
||||
{
|
||||
return fmt::format("{}", _v);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
std::string
|
||||
stringize_impl(Tp _v, long)
|
||||
{
|
||||
auto _ss = std::stringstream{};
|
||||
_ss << _v;
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
template <typename LhsT, typename RhsT>
|
||||
auto
|
||||
stringize_impl(const std::pair<LhsT, RhsT>& _v, int)
|
||||
{
|
||||
return std::make_pair(stringize_impl(_v.first, 0), stringize_impl(_v.second, 0));
|
||||
}
|
||||
|
||||
struct join_args
|
||||
{
|
||||
std::string_view prefix = {};
|
||||
@@ -42,19 +76,17 @@ struct join_args
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
auto
|
||||
std::string
|
||||
join_impl(const Tp& _v)
|
||||
{
|
||||
return _v;
|
||||
return stringize_impl(_v, 0);
|
||||
}
|
||||
|
||||
template <typename LhsT, typename RhsT>
|
||||
auto
|
||||
std::string
|
||||
join_impl(const std::pair<LhsT, RhsT>& _v)
|
||||
{
|
||||
auto _ss = std::stringstream{};
|
||||
_ss << _v.first << "=" << _v.second;
|
||||
return _ss.str();
|
||||
return fmt::format("{}={}", join_impl(_v.first), join_impl(_v.second));
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
@@ -72,28 +104,64 @@ join(join_args ja, Args... args)
|
||||
return (std::stringstream{} << ja.prefix << _content << ja.suffix).str();
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
auto
|
||||
stringize_impl(const Tp& _v)
|
||||
{
|
||||
auto _ss = std::stringstream{};
|
||||
_ss << _v;
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
template <typename LhsT, typename RhsT>
|
||||
auto
|
||||
stringize_impl(const std::pair<LhsT, RhsT>& _v)
|
||||
{
|
||||
return std::make_pair(stringize_impl(_v.first), stringize_impl(_v.second));
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
auto
|
||||
stringize(Args... args)
|
||||
{
|
||||
return std::vector<std::pair<std::string, std::string>>{stringize_impl(args)...};
|
||||
return std::vector<std::pair<std::string, std::string>>{stringize_impl(args, 0)...};
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
struct handle_formatter
|
||||
{
|
||||
template <typename ParseContext>
|
||||
constexpr auto parse(ParseContext& ctx)
|
||||
{
|
||||
return ctx.begin();
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
auto format(const Tp& v, Ctx& ctx) const
|
||||
{
|
||||
return fmt::format_to(ctx.out(), "handle={}", v.handle);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
struct handle_formatter<const Tp> : handle_formatter<Tp>
|
||||
{};
|
||||
} // namespace utils
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
#if ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION >= 10300
|
||||
namespace fmt
|
||||
{
|
||||
template <>
|
||||
struct formatter<hsa_amd_memory_pool_t>
|
||||
: rocprofiler::hsa::utils::handle_formatter<hsa_amd_memory_pool_t>
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct formatter<hsa_amd_vmem_alloc_handle_t>
|
||||
: rocprofiler::hsa::utils::handle_formatter<hsa_amd_vmem_alloc_handle_t>
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct formatter<hsa_amd_memory_access_desc_t>
|
||||
{
|
||||
template <typename ParseContext>
|
||||
constexpr auto parse(ParseContext& ctx)
|
||||
{
|
||||
return ctx.begin();
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
auto format(const hsa_amd_memory_access_desc_t& v, Ctx& ctx) const
|
||||
{
|
||||
return fmt::format_to(
|
||||
ctx.out(), "permissions={}, agent_handle={}", v.permissions, v.agent_handle);
|
||||
}
|
||||
};
|
||||
} // namespace fmt
|
||||
#endif
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele