SWDEV-492623: Hip Host Function to Device Symbols Mapping (#18)
* Adding changes to register and read symbols from the hip fat binary
* adding json output for host_functions
* added error handling
* adding json tool support
* Adding tests
* formatting changes
* Adding documentation
* refactoring as per amd-staging
* Adding intializers and changing macros
* Fix page-migration background thread on fork (#31)
* Fix page-migration background thread on fork
After falling off main in the forked child, all the children
try to join on on the parent's monitoring thread. This results
in a deadlock. Parent is waiting for the child to exit, but
the child is trying to join the parent's thread which is
signaled from the parent's static destructors.
Even with just one parent and child, due to copy-on-write
semantics, a child signalling the background thread to join
will still block (thread's updated state is not visible
in the child).
This fix creates background treads on fork per-child with a
pthread_atfork handler, ensuring that each child has its own
monitoring thread.
* Formatting fixes
* Detach page-migration background thread and update test timeout
* Attach files with ctest
* Update corr-id assert
* Tweak on-fork, simplify background thread
* Revert thread detach
* Adding --collection-period feature in rocprofv3 to match v1/v2 parity (#9)
* Adding Trace Period feature to rocprofv3
* Adding feature documentation
* Update source/bin/rocprofv3.py
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
* Fixing format
* Moving to Collection Period and changing the input params
* Format Fixes
* Fixing rebasing issues
* Removing atomic include from the tool
* Adding more options for units, optimizing the code
* Fixing rocprofv3.py
* Fixing time conv & adding time controlled app
* Fixing format
* Changing to shared memory testing methodology
* use of shmem use
* Fix include headers for transpose-time-controlled.cpp
* Format upload-image-to-github.py
* Removing shmem and using only env var to dump timestamps from the tool
* Tool Fixes + Test Config
* Adding Tests
* Fixing Review comments
* Update trace period implementation
* Update trace period tests
* check between start and stop timestamps
* Merge Fix
* Update validate.py
* Improve safety of rocprofiler_stop_context after finalization
* Pass context id to collection_period_cntrl by value
* Adding 20 us error margin
* Ensure log level for collection-period test is not more than warning
---------
Co-authored-by: Ammar ELWazir <aelwazir@amd.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
* Update lib/rocprofiler-sdk/code_object/hip/code_object.*
- move error code check macros to implementation
- fix macros which check error code
- use constexpr values instead of #define
* Update lib/rocprofiler-sdk/code_object/hip/code_object.*
- debugging for error that cannot be locally reproduced
* Update lib/rocprofiler-sdk/code_object/hip/code_object.*
- improve error handling and logging
* Update lib/rocprofiler-sdk/code_object/hip/code_object.*
- tweak to non-fatal logging messages
* Update lib/rocprofiler-sdk/code_object/hip/code_object.*
- cleanup of logging messages
* Update host kernel symbol register data fields
* Update source/lib/rocprofiler-sdk/code_object/hip/code_object.hpp
---------
Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
Co-authored-by: Kuricheti, Mythreya <Mythreya.Kuricheti@amd.com>
Co-authored-by: Elwazir, Ammar <Ammar.Elwazir@amd.com>
Co-authored-by: Ammar ELWazir <aelwazir@amd.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: 78d8f4b8ea]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
a0a0a4cffe
Коммит
82261be227
@@ -725,6 +725,97 @@
|
||||
]
|
||||
}
|
||||
},
|
||||
"host_functions": {
|
||||
"type": "array",
|
||||
"description": "Host function records.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Size of the host function record."
|
||||
},
|
||||
"host_function_id": {
|
||||
"type": "integer",
|
||||
"description": "ID of the HIP host function."
|
||||
},
|
||||
"kernel_id": {
|
||||
"type": "integer",
|
||||
"description": "ID of the corressponding kernel."
|
||||
},
|
||||
"code_object_id": {
|
||||
"type": "integer",
|
||||
"description": "ID of the corressponding code object."
|
||||
},
|
||||
"host_function_address": {
|
||||
"type": "integer",
|
||||
"description": "Address of the host function registered."
|
||||
},
|
||||
"modules_address": {
|
||||
"type": "integer",
|
||||
"description": "Address of the hip fat binary registered"
|
||||
},
|
||||
"device_function": {
|
||||
"type": "string",
|
||||
"description": "Device function name registered"
|
||||
},
|
||||
"thread_limit": {
|
||||
"type": "integer",
|
||||
"description": "Size of the thread limit."
|
||||
},
|
||||
"thread_id_address": {
|
||||
"type": "integer",
|
||||
"description": "Address of thread Id."
|
||||
},
|
||||
"block_id_address": {
|
||||
"type": "string",
|
||||
"description": "Address of Block Id."
|
||||
},
|
||||
"block_dim_address": {
|
||||
"type": "string",
|
||||
"description": "Address of Block Dimension"
|
||||
},
|
||||
"grid_dim_address": {
|
||||
"type": "string",
|
||||
"description": "Address of the Grid Dimension."
|
||||
},
|
||||
"workgroup_size_address": {
|
||||
"type": "string",
|
||||
"description": "Address of the work group size."
|
||||
},
|
||||
"formatted_host_function_name": {
|
||||
"type": "string",
|
||||
"description": "Formatted name of the device function."
|
||||
},
|
||||
"demangled_host_function_name": {
|
||||
"type": "string",
|
||||
"description": "Demangled name of device function."
|
||||
},
|
||||
"truncated_host_function_name": {
|
||||
"type": "string",
|
||||
"description": "Truncated name of device function."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"size",
|
||||
"host_function_id",
|
||||
"kernel_id",
|
||||
"code_object_id",
|
||||
"host_function_address",
|
||||
"modules_address",
|
||||
"device_function",
|
||||
"thread_limit",
|
||||
"thread_id_address",
|
||||
"block_id_address",
|
||||
"block_dim_address",
|
||||
"grid_dim_address",
|
||||
"workgroup_size_address",
|
||||
"formatted_host_function_name",
|
||||
"demangled_host_function_name",
|
||||
"truncated_host_function_name"
|
||||
]
|
||||
}
|
||||
},
|
||||
"callback_records": {
|
||||
"type": "object",
|
||||
"description": "Callback record details.",
|
||||
|
||||
@@ -181,6 +181,27 @@ typedef struct
|
||||
uint32_t accum_vgpr_count; ///< Accum vector general purpose register count
|
||||
|
||||
} rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
// rename struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
uint64_t host_function_id; ///< unique host function identifier value
|
||||
uint64_t kernel_id; ///< unique symbol identifier value
|
||||
uint64_t code_object_id; ///< parent unique code object identifier
|
||||
rocprofiler_address_t host_function; ///< kernel host function pointer
|
||||
rocprofiler_address_t modules; ///< reference address where modules will be loaded
|
||||
const char* device_function;
|
||||
uint32_t thread_limit; ///< thread limit
|
||||
rocprofiler_dim3_t thread_ids; ///< thread ids address
|
||||
rocprofiler_dim3_t block_ids; ///< block ids address
|
||||
rocprofiler_dim3_t block_dims; ///< block dimensions address
|
||||
rocprofiler_dim3_t grid_dims; ///< grid dimensions address
|
||||
uint64_t workgroup_size; ///< workgroup size address
|
||||
|
||||
/// @var device_function
|
||||
/// @brief device function name used to map the metadata during kernel launch
|
||||
} rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Kernel Dispatch Callback Tracer Record.
|
||||
|
||||
@@ -139,6 +139,13 @@ save(ArchiveT& ar, rocprofiler_dim3_t data)
|
||||
ROCP_SDK_SAVE_DATA_FIELD(z);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_address_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_FIELD(value);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_load_data_t data)
|
||||
@@ -181,6 +188,25 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_kernel_symbol_regist
|
||||
ROCP_SDK_SAVE_DATA_FIELD(accum_vgpr_count);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_FIELD(size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(host_function_id);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(kernel_id);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(code_object_id);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(host_function);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(modules);
|
||||
ROCP_SDK_SAVE_DATA_CSTR(device_function);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(thread_limit);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(thread_ids);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(block_ids);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(block_dims);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(grid_dims);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(workgroup_size);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_hsa_api_retval_t data)
|
||||
|
||||
@@ -34,21 +34,12 @@ namespace utility
|
||||
{
|
||||
template <typename Tp>
|
||||
auto
|
||||
_as_hex(Tp val, size_t width = 0)
|
||||
as_hex(Tp val, size_t width = 0)
|
||||
{
|
||||
auto ss = std::stringstream{};
|
||||
ss << "0x" << std::hex << std::setw(width) << std::setfill('0') << val;
|
||||
ss << "0x" << std::hex << std::setfill('0') << std::setw(width) << val;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
#define ROCPROFILER_CXX_DEFINE_AS_HEX(TYPE) \
|
||||
inline auto as_hex(TYPE val, size_t width = 0) \
|
||||
{ \
|
||||
return ::rocprofiler::sdk::utility::_as_hex(val, width); \
|
||||
}
|
||||
|
||||
ROCPROFILER_CXX_DEFINE_AS_HEX(uint64_t)
|
||||
#undef ROCPROFILER_CXX_DEFINE_AS_HEX
|
||||
} // namespace utility
|
||||
} // namespace sdk
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -216,7 +216,8 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
{
|
||||
ROCPROFILER_CODE_OBJECT_NONE = 0, ///< Unknown code object operation
|
||||
ROCPROFILER_CODE_OBJECT_LOAD, ///< Code object containing kernel symbols
|
||||
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols
|
||||
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols - Device
|
||||
ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols - Host
|
||||
ROCPROFILER_CODE_OBJECT_LAST,
|
||||
} rocprofiler_code_object_operation_t;
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ set(TOOL_OUTPUT_HEADERS
|
||||
generateStats.hpp
|
||||
generator.hpp
|
||||
kernel_symbol_info.hpp
|
||||
host_symbol_info.hpp
|
||||
metadata.hpp
|
||||
output_config.hpp
|
||||
output_key.hpp
|
||||
|
||||
@@ -164,10 +164,12 @@ write_json(json_output& json_ar,
|
||||
|
||||
{
|
||||
auto kern_sym_data = tool_metadata.get_kernel_symbols();
|
||||
auto host_sym_data = tool_metadata.get_host_symbols();
|
||||
auto code_obj_data = tool_metadata.get_code_objects();
|
||||
|
||||
json_ar(cereal::make_nvp("code_objects", code_obj_data));
|
||||
json_ar(cereal::make_nvp("kernel_symbols", kern_sym_data));
|
||||
json_ar(cereal::make_nvp("host_functions", host_sym_data));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/demangle.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
using rocprofiler_host_kernel_symbol_data_t =
|
||||
rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t;
|
||||
|
||||
struct host_function_info : rocprofiler_host_kernel_symbol_data_t
|
||||
{
|
||||
using base_type = rocprofiler_host_kernel_symbol_data_t;
|
||||
|
||||
template <typename FuncT>
|
||||
host_function_info(const base_type& _base, FuncT&& _formatter)
|
||||
: base_type{_base}
|
||||
, formatted_host_function_name{_formatter(CHECK_NOTNULL(_base.device_function))}
|
||||
, demangled_host_function_name{common::cxx_demangle(CHECK_NOTNULL(_base.device_function))}
|
||||
, truncated_host_function_name{common::truncate_name(demangled_host_function_name)}
|
||||
{}
|
||||
|
||||
host_function_info();
|
||||
~host_function_info() = default;
|
||||
host_function_info(const host_function_info&) = default;
|
||||
host_function_info(host_function_info&&) noexcept = default;
|
||||
host_function_info& operator=(const host_function_info&) = default;
|
||||
host_function_info& operator=(host_function_info&&) noexcept = default;
|
||||
|
||||
std::string formatted_host_function_name = {};
|
||||
std::string demangled_host_function_name = {};
|
||||
std::string truncated_host_function_name = {};
|
||||
};
|
||||
|
||||
using host_function_data_vec_t = std::vector<host_function_info>;
|
||||
using host_function_info_map_t = std::unordered_map<uint64_t, host_function_info>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const ::rocprofiler::tool::host_function_info& data)
|
||||
{
|
||||
cereal::save(
|
||||
ar, static_cast<const ::rocprofiler::tool::rocprofiler_host_kernel_symbol_data_t&>(data));
|
||||
SAVE_DATA_FIELD(formatted_host_function_name);
|
||||
SAVE_DATA_FIELD(demangled_host_function_name);
|
||||
SAVE_DATA_FIELD(truncated_host_function_name);
|
||||
}
|
||||
|
||||
#undef SAVE_DATA_FIELD
|
||||
} // namespace cereal
|
||||
@@ -24,10 +24,13 @@
|
||||
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/output/agent_info.hpp"
|
||||
#include "lib/output/host_symbol_info.hpp"
|
||||
#include "lib/output/kernel_symbol_info.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -67,6 +70,25 @@ kernel_symbol_info::kernel_symbol_info()
|
||||
: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
{}
|
||||
|
||||
constexpr auto null_address_v = rocprofiler_address_t{.value = 0};
|
||||
constexpr auto null_dim3_v = rocprofiler_dim3_t{.x = 0, .y = 0, .z = 0};
|
||||
|
||||
host_function_info::host_function_info()
|
||||
: base_type{0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
null_address_v,
|
||||
null_address_v,
|
||||
"",
|
||||
0,
|
||||
null_dim3_v,
|
||||
null_dim3_v,
|
||||
null_dim3_v,
|
||||
null_dim3_v,
|
||||
0}
|
||||
{}
|
||||
|
||||
metadata::metadata(inprocess)
|
||||
: buffer_names{sdk::get_buffer_tracing_names()}
|
||||
, callback_names{sdk::get_callback_tracing_names()}
|
||||
@@ -196,6 +218,14 @@ metadata::get_kernel_symbol(uint64_t kernel_id) const
|
||||
});
|
||||
}
|
||||
|
||||
const host_function_info*
|
||||
metadata::get_host_function(uint64_t host_function_id) const
|
||||
{
|
||||
return host_functions.rlock([host_function_id](const auto& _data) -> const host_function_info* {
|
||||
return &_data.at(host_function_id);
|
||||
});
|
||||
}
|
||||
|
||||
const tool_counter_info*
|
||||
metadata::get_counter_info(uint64_t instance_id) const
|
||||
{
|
||||
@@ -271,6 +301,18 @@ metadata::get_kernel_symbols() const
|
||||
return _symbol_data;
|
||||
}
|
||||
|
||||
host_function_data_vec_t
|
||||
metadata::get_host_symbols() const
|
||||
{
|
||||
return host_functions.rlock([](const auto& _data_v) {
|
||||
auto _info = std::vector<host_function_info>{};
|
||||
_info.resize(_data_v.size() + 1, host_function_info{});
|
||||
for(const auto& itr : _data_v)
|
||||
_info.at(itr.first) = itr.second;
|
||||
return _info;
|
||||
});
|
||||
}
|
||||
|
||||
metadata::agent_info_ptr_vec_t
|
||||
metadata::get_gpu_agents() const
|
||||
{
|
||||
@@ -362,6 +404,16 @@ metadata::add_kernel_symbol(kernel_symbol_info&& sym)
|
||||
std::move(sym));
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_host_function(host_function_info&& func)
|
||||
{
|
||||
return host_functions.wlock(
|
||||
[](host_function_info_map_t& _data_v, host_function_info&& _func_v) -> bool {
|
||||
return _data_v.emplace(_func_v.host_function_id, std::move(_func_v)).second;
|
||||
},
|
||||
std::move(func));
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_string_entry(size_t key, std::string_view str)
|
||||
{
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#include "agent_info.hpp"
|
||||
#include "counter_info.hpp"
|
||||
#include "host_symbol_info.hpp"
|
||||
#include "kernel_symbol_info.hpp"
|
||||
#include "pc_sample_transform.hpp"
|
||||
|
||||
@@ -100,6 +101,7 @@ struct metadata
|
||||
synced_map<marker_message_map_t> marker_messages = {};
|
||||
synced_map<string_entry_map_t> string_entries = {};
|
||||
synced_map<external_corr_id_set_t> external_corr_ids = {};
|
||||
synced_map<host_function_info_map_t> host_functions = {};
|
||||
|
||||
metadata() = default;
|
||||
metadata(inprocess);
|
||||
@@ -115,12 +117,14 @@ struct metadata
|
||||
const agent_info* get_agent(rocprofiler_agent_id_t _val) const;
|
||||
const code_object_info* get_code_object(uint64_t code_obj_id) const;
|
||||
const kernel_symbol_info* get_kernel_symbol(uint64_t kernel_id) const;
|
||||
const host_function_info* get_host_function(uint64_t host_function_id) const;
|
||||
const tool_counter_info* get_counter_info(uint64_t instance_id) const;
|
||||
const tool_counter_info* get_counter_info(rocprofiler_counter_id_t id) const;
|
||||
const counter_dimension_info_vec_t* get_counter_dimension_info(uint64_t instance_id) const;
|
||||
|
||||
code_object_data_vec_t get_code_objects() const;
|
||||
kernel_symbol_data_vec_t get_kernel_symbols() const;
|
||||
host_function_data_vec_t get_host_symbols() const;
|
||||
agent_info_ptr_vec_t get_gpu_agents() const;
|
||||
counter_info_vec_t get_counter_info() const;
|
||||
counter_dimension_vec_t get_counter_dimension_info() const;
|
||||
@@ -138,6 +142,7 @@ struct metadata
|
||||
bool add_marker_message(uint64_t corr_id, std::string&& msg);
|
||||
bool add_code_object(code_object_info obj);
|
||||
bool add_kernel_symbol(kernel_symbol_info&& sym);
|
||||
bool add_host_function(host_function_info&& func);
|
||||
bool add_string_entry(size_t key, std::string_view str);
|
||||
bool add_external_correlation_id(uint64_t);
|
||||
|
||||
|
||||
@@ -86,7 +86,10 @@ constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2);
|
||||
using marker_message_map_t = std::unordered_map<uint64_t, std::string>;
|
||||
using tool_counter_info = ::rocprofiler::tool::tool_counter_info;
|
||||
using kernel_symbol_info = ::rocprofiler::tool::kernel_symbol_info;
|
||||
using host_function_info = ::rocprofiler::tool::host_function_info;
|
||||
using rocprofiler_kernel_symbol_info_t = ::rocprofiler::tool::rocprofiler_kernel_symbol_info_t;
|
||||
using rocprofiler_host_kernel_symbol_data_t =
|
||||
::rocprofiler::tool::rocprofiler_host_kernel_symbol_data_t;
|
||||
|
||||
enum tracing_marker_kind
|
||||
{
|
||||
|
||||
@@ -649,6 +649,23 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
}
|
||||
}
|
||||
|
||||
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
||||
record.operation == ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER)
|
||||
{
|
||||
auto* hst_data = static_cast<rocprofiler_host_kernel_symbol_data_t*>(record.payload);
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD)
|
||||
{
|
||||
auto success = CHECK_NOTNULL(tool_metadata)
|
||||
->add_host_function(host_function_info{
|
||||
get_dereference(hst_data),
|
||||
[](const char* val) { return tool::format_name(val); }});
|
||||
ROCP_WARNING_IF(!success)
|
||||
<< "duplicate host function found for kernel_id=" << hst_data->kernel_id;
|
||||
|
||||
// TODO : kernel filtering for host functions?!
|
||||
}
|
||||
}
|
||||
|
||||
(void) user_data;
|
||||
(void) data;
|
||||
}
|
||||
|
||||
+176
-3
@@ -21,12 +21,14 @@
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/code_object/code_object.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/code_object/hip/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/code_object/hsa/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/code_object/hsa/kernel_symbol.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
@@ -41,10 +43,13 @@
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <regex>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -72,6 +77,7 @@ struct code_object_info;
|
||||
SPECIALIZE_CODE_OBJECT_INFO(NONE)
|
||||
SPECIALIZE_CODE_OBJECT_INFO(LOAD)
|
||||
SPECIALIZE_CODE_OBJECT_INFO(DEVICE_KERNEL_SYMBOL_REGISTER)
|
||||
SPECIALIZE_CODE_OBJECT_INFO(HOST_KERNEL_SYMBOL_REGISTER)
|
||||
|
||||
#undef SPECIALIZE_CODE_OBJECT_INFO
|
||||
|
||||
@@ -356,6 +362,13 @@ get_kernel_symbol_id()
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_host_function_id()
|
||||
{
|
||||
static auto _v = std::atomic<uint64_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
using kernel_object_map_t = std::unordered_map<uint64_t, uint64_t>;
|
||||
using executable_array_t = std::vector<hsa_executable_t>;
|
||||
using code_object_unload_array_t = std::vector<hsa::code_object_unload>;
|
||||
@@ -388,6 +401,14 @@ get_kernel_object_map()
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto*
|
||||
get_hip_register_data()
|
||||
{
|
||||
static auto*& _v =
|
||||
common::static_object<common::Synchronized<hip::hip_register_data>>::construct();
|
||||
return _v;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
executable_iterate_agent_symbols_load_callback(hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
@@ -723,14 +744,62 @@ get_destroy_function()
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_hip_register_fatbinary_function()
|
||||
{
|
||||
static decltype(::std::declval<HipCompilerDispatchTable>().__hipRegisterFatBinary_fn) _v =
|
||||
nullptr;
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_hip_register_function_function()
|
||||
{
|
||||
static decltype(::std::declval<HipCompilerDispatchTable>().__hipRegisterFunction_fn) _v =
|
||||
nullptr;
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool
|
||||
initialize_hip_binary_data()
|
||||
{
|
||||
static bool is_initialized =
|
||||
CHECK_NOTNULL(get_hip_register_data())->wlock([](hip::hip_register_data& data) {
|
||||
ROCP_WARNING_IF(!data.fat_binary) << "No binary registered for HIP";
|
||||
if(!data.fat_binary) return false;
|
||||
std::vector<const rocprofiler_agent_t*> rocp_agents = rocprofiler::agent::get_agents();
|
||||
for(const auto* rocp_agent : rocp_agents)
|
||||
{
|
||||
if(rocp_agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
auto hsa_agent = agent::get_hsa_agent(rocp_agent);
|
||||
if(!hsa_agent.has_value()) continue;
|
||||
for(auto& isa : hip::get_isa_offsets(hsa_agent.value(), data.fat_binary))
|
||||
{
|
||||
auto kernel_symbols_name_map =
|
||||
hip::get_kernel_symbol_device_name_map(isa, data.fat_binary);
|
||||
// many to one mapping as the same kernel symbols can be found in multiple code
|
||||
// objects
|
||||
if(!kernel_symbols_name_map.empty())
|
||||
data.kernel_symbol_device_map.insert(kernel_symbols_name_map.begin(),
|
||||
kernel_symbols_name_map.end());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
return is_initialized;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
executable_freeze(hsa_executable_t executable, const char* options)
|
||||
{
|
||||
hsa_status_t status = CHECK_NOTNULL(get_freeze_function())(executable, options);
|
||||
if(status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
ROCP_INFO << "running " << __FUNCTION__ << " (executable=" << executable.handle << ")...";
|
||||
// before iterating code-object populate the host function map from registered binary
|
||||
bool is_initialized = initialize_hip_binary_data();
|
||||
ROCP_ERROR_IF(!is_initialized) << "hip mapping data not initialized";
|
||||
|
||||
ROCP_INFO << "running " << __FUNCTION__ << " (executable=" << executable.handle << ")...";
|
||||
CHECK_NOTNULL(get_executables())->wlock([executable](executable_array_t& data) {
|
||||
data.emplace_back(executable);
|
||||
});
|
||||
@@ -745,6 +814,7 @@ executable_freeze(hsa_executable_t executable, const char* options)
|
||||
constexpr auto CODE_OBJECT_LOAD = ROCPROFILER_CODE_OBJECT_LOAD;
|
||||
constexpr auto CODE_OBJECT_KERNEL_SYMBOL =
|
||||
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER;
|
||||
constexpr auto CODE_OBJECT_HOST_SYMBOL = ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER;
|
||||
|
||||
auto&& context_filter = [](const context_t* ctx) {
|
||||
return (ctx->callback_tracer && ctx->callback_tracer->domains(CODE_OBJECT_KIND) &&
|
||||
@@ -811,6 +881,40 @@ executable_freeze(hsa_executable_t executable, const char* options)
|
||||
citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
|
||||
auto& user_data = sitr->user_data[citr];
|
||||
cb_data.callback(record, &user_data, cb_data.data);
|
||||
|
||||
std::string device_name =
|
||||
CHECK_NOTNULL(get_hip_register_data())
|
||||
->rlock([sym_data](
|
||||
const hip::hip_register_data& register_data) {
|
||||
const auto& sym_map =
|
||||
register_data.kernel_symbol_device_map;
|
||||
const auto it = sym_map.find(*CHECK_NOTNULL(
|
||||
common::get_string_entry(sym_data.kernel_name)));
|
||||
if(it != sym_map.end()) return it->second;
|
||||
return std::string();
|
||||
});
|
||||
// Does not have a host function, skip
|
||||
if(device_name.empty()) continue;
|
||||
auto host_data =
|
||||
CHECK_NOTNULL(get_hip_register_data())
|
||||
->rlock([device_name](
|
||||
const hip::hip_register_data& register_data) {
|
||||
return register_data.host_function_map.at(device_name);
|
||||
});
|
||||
host_data.code_object_id = sym_data.code_object_id;
|
||||
host_data.kernel_id = sym_data.kernel_id;
|
||||
host_data.host_function_id = ++get_host_function_id();
|
||||
auto hip_record = rocprofiler_callback_tracing_record_t{
|
||||
.context_id = rocprofiler_context_id_t{citr->context_idx},
|
||||
.thread_id = tidx,
|
||||
.correlation_id = rocprofiler_correlation_id_t{},
|
||||
.kind = CODE_OBJECT_KIND,
|
||||
.operation = CODE_OBJECT_HOST_SYMBOL,
|
||||
.phase = ROCPROFILER_CALLBACK_PHASE_LOAD,
|
||||
.payload = static_cast<void*>(&host_data)};
|
||||
|
||||
// invoke callback
|
||||
cb_data.callback(hip_record, &user_data, cb_data.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -877,6 +981,63 @@ executable_destroy(hsa_executable_t executable)
|
||||
return CHECK_NOTNULL(get_destroy_function())(executable);
|
||||
}
|
||||
|
||||
void**
|
||||
hip_register_fat_binary(const void* data)
|
||||
{
|
||||
const hip::hip_fat_binary_wrapper* fbwrapper =
|
||||
reinterpret_cast<const hip::hip_fat_binary_wrapper*>(data);
|
||||
ROCP_ERROR_IF((fbwrapper->magic != hip::HIP_FAT_MAGIC || fbwrapper->version != 1))
|
||||
<< "register fat binary failed";
|
||||
CHECK_NOTNULL(get_hip_register_data())->wlock([fbwrapper](hip::hip_register_data& reg_data) {
|
||||
reg_data.fat_binary = fbwrapper->binary;
|
||||
});
|
||||
return CHECK_NOTNULL(get_hip_register_fatbinary_function())(data);
|
||||
}
|
||||
|
||||
void
|
||||
hip_register_function(void** modules,
|
||||
const void* host_function,
|
||||
char* device_function,
|
||||
const char* device_name,
|
||||
unsigned int thread_limit,
|
||||
uint3* thread_id,
|
||||
uint3* block_id,
|
||||
dim3* block_dim,
|
||||
dim3* grid_dim,
|
||||
int* workgroup_size)
|
||||
{
|
||||
auto convert_to_dim3 = [](auto* val) {
|
||||
return (val) ? rocprofiler_dim3_t{.x = val->x, .y = val->y, .z = val->z}
|
||||
: rocprofiler_dim3_t{0, 0, 0};
|
||||
};
|
||||
|
||||
CHECK_NOTNULL(get_hip_register_data())->wlock([&](hip::hip_register_data& data) {
|
||||
const std::string* d_func = common::get_string_entry(device_function);
|
||||
auto host_symbol = common::init_public_api_struct(hip::host_symbol_data_t{});
|
||||
host_symbol.host_function.ptr = const_cast<void*>(host_function);
|
||||
host_symbol.modules.ptr = modules;
|
||||
host_symbol.device_function = d_func->c_str();
|
||||
host_symbol.thread_limit = thread_limit;
|
||||
host_symbol.thread_ids = convert_to_dim3(thread_id);
|
||||
host_symbol.block_ids = convert_to_dim3(block_id);
|
||||
host_symbol.block_dims = convert_to_dim3(block_dim);
|
||||
host_symbol.grid_dims = convert_to_dim3(grid_dim);
|
||||
host_symbol.workgroup_size = (workgroup_size) ? *workgroup_size : 0;
|
||||
data.host_function_map.emplace(*CHECK_NOTNULL(d_func), host_symbol);
|
||||
});
|
||||
CHECK_NOTNULL(get_hip_register_function_function())
|
||||
(modules,
|
||||
host_function,
|
||||
device_function,
|
||||
device_name,
|
||||
thread_limit,
|
||||
thread_id,
|
||||
block_id,
|
||||
block_dim,
|
||||
grid_dim,
|
||||
workgroup_size);
|
||||
}
|
||||
|
||||
std::vector<hsa::code_object_unload>
|
||||
shutdown(hsa_executable_t executable)
|
||||
{
|
||||
@@ -980,6 +1141,19 @@ initialize(HsaApiTable* table)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
initialize(HipCompilerDispatchTable* table)
|
||||
{
|
||||
get_hip_register_fatbinary_function() = CHECK_NOTNULL(table->__hipRegisterFatBinary_fn);
|
||||
get_hip_register_function_function() = CHECK_NOTNULL(table->__hipRegisterFunction_fn);
|
||||
table->__hipRegisterFatBinary_fn = hip_register_fat_binary;
|
||||
table->__hipRegisterFunction_fn = hip_register_function;
|
||||
ROCP_FATAL_IF(get_hip_register_fatbinary_function() == table->__hipRegisterFatBinary_fn)
|
||||
<< "infinite recursion";
|
||||
ROCP_FATAL_IF(get_hip_register_function_function() == table->__hipRegisterFunction_fn)
|
||||
<< "infinite recursion";
|
||||
}
|
||||
|
||||
uint64_t
|
||||
get_kernel_id(uint64_t kernel_object)
|
||||
{
|
||||
@@ -1013,7 +1187,6 @@ void
|
||||
iterate_loaded_code_objects(code_object_iterator_t&& func)
|
||||
{
|
||||
if(is_shutdown || !get_executables() || !get_code_objects()) return;
|
||||
|
||||
CHECK_NOTNULL(get_code_objects())
|
||||
->rlock(
|
||||
[](const code_object_array_t& data, code_object_iterator_t&& func_v) {
|
||||
|
||||
@@ -59,6 +59,9 @@ iterate_loaded_code_objects(code_object_iterator_t&& func);
|
||||
void
|
||||
initialize(HsaApiTable* table);
|
||||
|
||||
void
|
||||
initialize(HipCompilerDispatchTable* table);
|
||||
|
||||
void
|
||||
finalize();
|
||||
} // namespace code_object
|
||||
|
||||
+2
-2
@@ -1,8 +1,8 @@
|
||||
#
|
||||
# code object data structures from HIP
|
||||
#
|
||||
set(ROCPROFILER_LIB_CODE_OBJECT_HIP_SOURCES)
|
||||
set(ROCPROFILER_LIB_CODE_OBJECT_HIP_HEADERS)
|
||||
set(ROCPROFILER_LIB_CODE_OBJECT_HIP_SOURCES code_object.cpp)
|
||||
set(ROCPROFILER_LIB_CODE_OBJECT_HIP_HEADERS code_object.hpp)
|
||||
|
||||
target_sources(
|
||||
rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_CODE_OBJECT_HIP_SOURCES}
|
||||
|
||||
+252
@@ -0,0 +1,252 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/code_object/hip/code_object.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
|
||||
#include <amd_comgr/amd_comgr.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace code_object
|
||||
{
|
||||
namespace hip
|
||||
{
|
||||
constexpr auto kernels_metadata_lookup = "amdhsa.kernels";
|
||||
constexpr auto kernel_name_metadata_lookup = ".name";
|
||||
constexpr auto kernel_symbol_metadata_lookup = ".symbol";
|
||||
|
||||
#define CHECK_RETURN_HSA(call) \
|
||||
{ \
|
||||
if(hsa_status_t status = (call); status != HSA_STATUS_SUCCESS) \
|
||||
{ \
|
||||
const char* reason = "<unknown-error-reason>"; \
|
||||
if(rocprofiler::hsa::get_core_table()) \
|
||||
rocprofiler::hsa::get_core_table()->hsa_status_string_fn(status, &reason); \
|
||||
ROCP_INFO << #call << " returned error code " << status << " :: " << reason; \
|
||||
return status; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_FATAL_COMGR(call) \
|
||||
if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \
|
||||
{ \
|
||||
const char* reason = ""; \
|
||||
amd_comgr_status_string(status, &reason); \
|
||||
ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason; \
|
||||
}
|
||||
|
||||
#define CHECK_FATAL_COMGR_EXT(call, ...) \
|
||||
if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \
|
||||
{ \
|
||||
const char* reason = ""; \
|
||||
amd_comgr_status_string(status, &reason); \
|
||||
ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason \
|
||||
<< " :: " << __VA_ARGS__; \
|
||||
}
|
||||
|
||||
#define CHECK_RETURN_COMGR(call) \
|
||||
if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \
|
||||
{ \
|
||||
const char* reason = ""; \
|
||||
amd_comgr_status_string(status, &reason); \
|
||||
ROCP_INFO << #call << " returned error code " << status << " :: " << reason; \
|
||||
return AMD_COMGR_STATUS_ERROR; \
|
||||
}
|
||||
|
||||
#define CHECK_RETURN_COMGR_EXT(call, ...) \
|
||||
if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \
|
||||
{ \
|
||||
const char* reason = ""; \
|
||||
amd_comgr_status_string(status, &reason); \
|
||||
ROCP_INFO << #call << " returned error code " << status << " :: " << reason \
|
||||
<< " :: " << __VA_ARGS__; \
|
||||
return AMD_COMGR_STATUS_ERROR; \
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
get_isa_info(hsa_isa_t isa, void* data)
|
||||
{
|
||||
size_t name_len = 0;
|
||||
CHECK_RETURN_HSA(rocprofiler::hsa::get_core_table()->hsa_isa_get_info_alt_fn(
|
||||
isa, HSA_ISA_INFO_NAME_LENGTH, &name_len));
|
||||
|
||||
ROCP_INFO << "isa name length: " << name_len;
|
||||
|
||||
if(name_len > 0)
|
||||
{
|
||||
auto name = std::string(name_len, '\0');
|
||||
CHECK_RETURN_HSA(rocprofiler::hsa::get_core_table()->hsa_isa_get_info_alt_fn(
|
||||
isa, HSA_ISA_INFO_NAME, name.data()));
|
||||
name = name.substr(0, name.find_first_of('\0'));
|
||||
|
||||
ROCP_INFO << "found isa: " << name;
|
||||
|
||||
auto* info = static_cast<isa_names_t*>(data);
|
||||
CHECK_NOTNULL(info)->emplace_back(common::get_string_entry(name));
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
comgr_code_object_vec_t
|
||||
get_isa_offsets(hsa_agent_t hsa_agent, const void* fat_bin)
|
||||
{
|
||||
auto isas = isa_names_t{};
|
||||
auto hsa_status = rocprofiler::hsa::get_core_table()->hsa_agent_iterate_isas_fn(
|
||||
hsa_agent, get_isa_info, &isas);
|
||||
|
||||
if(isas.empty())
|
||||
{
|
||||
ROCP_INFO << "failed to get ISAs for agent-"
|
||||
<< CHECK_NOTNULL(agent::get_rocprofiler_agent(hsa_agent))->node_id
|
||||
<< " :: " << rocprofiler::hsa::get_hsa_status_string(hsa_status);
|
||||
return comgr_code_object_vec_t{};
|
||||
}
|
||||
|
||||
auto query_list = comgr_code_object_vec_t{};
|
||||
for(auto& isa : isas)
|
||||
query_list.emplace_back(amd_comgr_code_object_info_t{isa->c_str(), 0, 0});
|
||||
|
||||
auto data_object = amd_comgr_data_t{0};
|
||||
CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object));
|
||||
CHECK_FATAL_COMGR(
|
||||
amd_comgr_set_data(data_object, 4096, reinterpret_cast<const char*>(fat_bin)));
|
||||
CHECK_FATAL_COMGR(
|
||||
amd_comgr_lookup_code_object(data_object, query_list.data(), query_list.size()));
|
||||
CHECK_FATAL_COMGR(amd_comgr_release_data(data_object));
|
||||
|
||||
return query_list;
|
||||
}
|
||||
|
||||
amd_comgr_status_t
|
||||
get_node_string(const amd_comgr_metadata_node_t& node, std::string* value)
|
||||
{
|
||||
size_t size = 0;
|
||||
CHECK_RETURN_COMGR(amd_comgr_get_metadata_string(node, &size, nullptr));
|
||||
CHECK_NOTNULL(value)->resize(size, '\0');
|
||||
CHECK_RETURN_COMGR(amd_comgr_get_metadata_string(node, &size, value->data()));
|
||||
*value = value->substr(0, value->find_first_of('\0'));
|
||||
ROCP_INFO << "found node string: " << *value;
|
||||
return AMD_COMGR_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amd_comgr_status_t
|
||||
get_device_name_kernel_symbols_mapping(const amd_comgr_metadata_node_t key,
|
||||
const amd_comgr_metadata_node_t value,
|
||||
void* data)
|
||||
{
|
||||
std::string key_str{};
|
||||
CHECK_RETURN_COMGR(get_node_string(key, &key_str));
|
||||
if(key_str != kernel_symbol_metadata_lookup) return AMD_COMGR_STATUS_SUCCESS;
|
||||
|
||||
// More meta data information can be extracted from binary image here
|
||||
std::string* kernel_symbol = static_cast<std::string*>(data);
|
||||
CHECK_RETURN_COMGR(get_node_string(value, kernel_symbol));
|
||||
return AMD_COMGR_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amd_comgr_status_t
|
||||
get_kernels_meta_node(const amd_comgr_code_object_info_t& isa_offset,
|
||||
const void* fat_bin,
|
||||
amd_comgr_metadata_node_t* kernels_metadata)
|
||||
{
|
||||
auto binary_data = amd_comgr_data_t{0};
|
||||
CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binary_data));
|
||||
|
||||
void* bin_offset = static_cast<char*>(const_cast<void*>(fat_bin)) + isa_offset.offset;
|
||||
CHECK_RETURN_COMGR_EXT(
|
||||
amd_comgr_set_data(binary_data, isa_offset.size, static_cast<const char*>(bin_offset)),
|
||||
"binary_data=" << binary_data.handle << ", isa=(" << isa_offset.isa << ", "
|
||||
<< isa_offset.size << ", " << isa_offset.offset << "), fat_bin=" << fat_bin);
|
||||
|
||||
auto binary_metadata = amd_comgr_metadata_node_t{};
|
||||
CHECK_FATAL_COMGR(amd_comgr_get_data_metadata(binary_data, &binary_metadata));
|
||||
CHECK_FATAL_COMGR(
|
||||
amd_comgr_metadata_lookup(binary_metadata, kernels_metadata_lookup, kernels_metadata));
|
||||
|
||||
return AMD_COMGR_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
kernel_symbol_hip_device_map_t
|
||||
get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset,
|
||||
const void* fat_bin)
|
||||
{
|
||||
auto kernel_sym_device_func_map = kernel_symbol_hip_device_map_t{};
|
||||
auto kernels_metadata = amd_comgr_metadata_node_t{0};
|
||||
size_t num_kernels{0};
|
||||
|
||||
if(get_kernels_meta_node(isa_offset, CHECK_NOTNULL(fat_bin), &kernels_metadata) !=
|
||||
AMD_COMGR_STATUS_SUCCESS)
|
||||
return kernel_sym_device_func_map;
|
||||
|
||||
CHECK_FATAL_COMGR(amd_comgr_get_metadata_list_size(kernels_metadata, &num_kernels));
|
||||
for(size_t i = 0; i < num_kernels; i++)
|
||||
{
|
||||
auto kernel_node = amd_comgr_metadata_node_t{};
|
||||
auto kernel_name_meta = amd_comgr_metadata_node_t{};
|
||||
|
||||
CHECK_FATAL_COMGR(amd_comgr_index_list_metadata(kernels_metadata, i, &kernel_node));
|
||||
CHECK_FATAL_COMGR(
|
||||
amd_comgr_metadata_lookup(kernel_node, kernel_name_metadata_lookup, &kernel_name_meta));
|
||||
|
||||
auto kernel_meta_name = std::string{};
|
||||
if(get_node_string(kernel_name_meta, &kernel_meta_name) != AMD_COMGR_STATUS_SUCCESS ||
|
||||
kernel_meta_name.empty())
|
||||
continue;
|
||||
|
||||
ROCP_INFO << "found kernel meta name: " << kernel_meta_name;
|
||||
|
||||
auto kernel_symbol = std::string{};
|
||||
CHECK_FATAL_COMGR(amd_comgr_iterate_map_metadata(
|
||||
kernel_node, get_device_name_kernel_symbols_mapping, &kernel_symbol));
|
||||
if(!kernel_symbol.empty())
|
||||
{
|
||||
ROCP_INFO << "found kernel symbol mapping: " << kernel_symbol << " -> "
|
||||
<< kernel_meta_name;
|
||||
kernel_sym_device_func_map.emplace(kernel_symbol, kernel_meta_name);
|
||||
}
|
||||
}
|
||||
return kernel_sym_device_func_map;
|
||||
}
|
||||
} // namespace hip
|
||||
} // namespace code_object
|
||||
} // namespace rocprofiler
|
||||
+89
@@ -0,0 +1,89 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/code_object/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/code_object/hip/code_object.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
|
||||
#include <amd_comgr/amd_comgr.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace code_object
|
||||
{
|
||||
namespace hip
|
||||
{
|
||||
using host_symbol_data_t =
|
||||
rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t;
|
||||
using hip_host_function_map_t = std::unordered_map<std::string, host_symbol_data_t>;
|
||||
using isa_names_t = std::vector<const std::string*>;
|
||||
using kernel_symbol_hip_device_map_t = std::unordered_map<std::string, std::string>;
|
||||
using comgr_code_object_vec_t = std::vector<amd_comgr_code_object_info_t>;
|
||||
|
||||
constexpr unsigned HIP_FAT_MAGIC = 0x48495046; // HIPF
|
||||
|
||||
struct hip_register_data
|
||||
{
|
||||
const void* fat_binary = nullptr;
|
||||
hip_host_function_map_t host_function_map = {};
|
||||
kernel_symbol_hip_device_map_t kernel_symbol_device_map = {};
|
||||
};
|
||||
|
||||
struct hip_fat_binary_wrapper
|
||||
{
|
||||
unsigned int magic = 0;
|
||||
unsigned int version = 0;
|
||||
void* binary = nullptr;
|
||||
void* dummy1 = nullptr;
|
||||
};
|
||||
|
||||
comgr_code_object_vec_t
|
||||
get_isa_offsets(hsa_agent_t hsa_agent, const void* fat_bin);
|
||||
|
||||
kernel_symbol_hip_device_map_t
|
||||
get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset,
|
||||
const void* fat_bin);
|
||||
|
||||
} // namespace hip
|
||||
} // namespace code_object
|
||||
} // namespace rocprofiler
|
||||
@@ -763,6 +763,8 @@ rocprofiler_set_api_table(const char* name,
|
||||
// the HIP API tracing invokes the function pointers from the copy below
|
||||
rocprofiler::hip::copy_table(hip_compiler_api_table, lib_instance);
|
||||
|
||||
rocprofiler::code_object::initialize(hip_compiler_api_table);
|
||||
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::hip::update_table(hip_compiler_api_table);
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ def test_data_structure(input_data):
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("code_objects", sdk_data["callback_records"])
|
||||
node_exists("kernel_symbols", sdk_data["callback_records"])
|
||||
node_exists("host_functions", sdk_data["callback_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["callback_records"])
|
||||
|
||||
@@ -34,6 +34,7 @@ def test_data_structure(input_data):
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("code_objects", sdk_data["callback_records"])
|
||||
node_exists("kernel_symbols", sdk_data["callback_records"])
|
||||
node_exists("host_functions", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"])
|
||||
node_exists("kernel_dispatch", sdk_data["callback_records"])
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ def test_data_structure(input_data):
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("code_objects", sdk_data["callback_records"])
|
||||
node_exists("kernel_symbols", sdk_data["callback_records"])
|
||||
node_exists("host_functions", sdk_data["callback_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["callback_records"])
|
||||
|
||||
@@ -82,6 +82,7 @@ def test_data_structure(input_data):
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("code_objects", sdk_data["callback_records"])
|
||||
node_exists("kernel_symbols", sdk_data["callback_records"])
|
||||
node_exists("host_functions", sdk_data["callback_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["callback_records"], 0)
|
||||
|
||||
@@ -121,6 +121,34 @@ def test_kernel_trace(kernel_input_data):
|
||||
assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"])
|
||||
|
||||
|
||||
def test_host_functions_json(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
|
||||
def get_kernel_name(kernel_id):
|
||||
return data["kernel_symbols"][kernel_id]["truncated_kernel_name"]
|
||||
|
||||
def get_host_function_name(host_function_id):
|
||||
return data["host_functions"][host_function_id]["truncated_host_function_name"]
|
||||
|
||||
host_function_data = data.host_functions
|
||||
kernel_symbols_data = data.kernel_symbols
|
||||
code_objects_data = data.code_objects
|
||||
assert len(host_function_data) > 0
|
||||
for host_function in host_function_data:
|
||||
if host_function.host_function_id == 0:
|
||||
continue
|
||||
assert host_function.host_function_id > 0
|
||||
assert host_function.kernel_id > 0 and host_function.kernel_id <= len(
|
||||
kernel_symbols_data
|
||||
)
|
||||
assert host_function.code_object_id > 0 and host_function.code_object_id <= len(
|
||||
code_objects_data
|
||||
)
|
||||
assert get_host_function_name(
|
||||
host_function["host_function_id"]
|
||||
) == get_kernel_name(host_function.kernel_id)
|
||||
|
||||
|
||||
def test_kernel_trace_json(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ def test_data_structure(input_data):
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("code_objects", sdk_data["callback_records"])
|
||||
node_exists("kernel_symbols", sdk_data["callback_records"])
|
||||
node_exists("host_functions", sdk_data["callback_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
||||
node_exists("scratch_memory_traces", sdk_data["callback_records"], min_len=8)
|
||||
|
||||
@@ -202,7 +202,10 @@ make_array(Tp&& arg, Args&&... args)
|
||||
using call_stack_t = std::vector<source_location>;
|
||||
|
||||
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
using host_function_data_t =
|
||||
rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t;
|
||||
using kernel_symbol_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
|
||||
using host_functions_map_t = std::unordered_map<uint64_t, host_function_data_t>;
|
||||
|
||||
rocprofiler_client_id_t* client_id = nullptr;
|
||||
rocprofiler_client_finalize_t client_fini_func = nullptr;
|
||||
@@ -294,6 +297,21 @@ struct kernel_symbol_callback_record_t
|
||||
}
|
||||
};
|
||||
|
||||
struct host_function_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
rocprofiler_callback_tracing_record_t record = {};
|
||||
rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t payload = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("timestamp", timestamp));
|
||||
cereal::save(ar, record);
|
||||
ar(cereal::make_nvp("payload", payload));
|
||||
}
|
||||
};
|
||||
|
||||
struct runtime_init_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
@@ -527,6 +545,7 @@ auto counter_info = std::deque<rocprofiler_counter_info_v0_t>{}
|
||||
auto runtime_init_cb_records = std::deque<runtime_init_callback_record_t>{};
|
||||
auto code_object_records = std::deque<code_object_callback_record_t>{};
|
||||
auto kernel_symbol_records = std::deque<kernel_symbol_callback_record_t>{};
|
||||
auto host_function_records = std::deque<host_function_callback_record_t>{};
|
||||
auto hsa_api_cb_records = std::deque<hsa_api_callback_record_t>{};
|
||||
auto marker_api_cb_records = std::deque<marker_api_callback_record_t>{};
|
||||
auto counter_collection_bf_records = std::deque<profile_counting_record>{};
|
||||
@@ -676,6 +695,13 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
kernel_symbol_records.emplace_back(kernel_symbol_callback_record_t{ts, record, data_v});
|
||||
}
|
||||
else if(record.operation == ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER)
|
||||
{
|
||||
auto data_v = *static_cast<host_function_data_t*>(record.payload);
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
host_function_records.emplace_back(host_function_callback_record_t{ts, record, data_v});
|
||||
}
|
||||
}
|
||||
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API ||
|
||||
record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API ||
|
||||
@@ -1652,6 +1678,7 @@ tool_fini(void* tool_data)
|
||||
<< ", runtime_init_callback_records=" << runtime_init_cb_records.size()
|
||||
<< ", code_object_callback_records=" << code_object_records.size()
|
||||
<< ", kernel_symbol_callback_records=" << kernel_symbol_records.size()
|
||||
<< ", host_function_callback_records=" << host_function_records.size()
|
||||
<< ", hsa_api_callback_records=" << hsa_api_cb_records.size()
|
||||
<< ", hip_api_callback_records=" << hip_api_cb_records.size()
|
||||
<< ", marker_api_callback_records=" << marker_api_cb_records.size()
|
||||
@@ -1760,6 +1787,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("runtime_init", runtime_init_cb_records));
|
||||
json_ar(cereal::make_nvp("code_objects", code_object_records));
|
||||
json_ar(cereal::make_nvp("kernel_symbols", kernel_symbol_records));
|
||||
json_ar(cereal::make_nvp("host_functions", host_function_records));
|
||||
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_cb_records));
|
||||
json_ar(cereal::make_nvp("hip_api_traces", hip_api_cb_records));
|
||||
json_ar(cereal::make_nvp("marker_api_traces", marker_api_cb_records));
|
||||
|
||||
Ссылка в новой задаче
Block a user