Memory Allocation Counter Track Shows Total Allocation (#71)
* Counter track for memory allocation is now a running sum showing total allocation * Address review comments * Update source/lib/output/generatePerfetto.cpp Co-authored-by: Meserve, Mark <Mark.Meserve@amd.com> * Updated to reflect review comments * Fix compilation errors on CI * remove braces on scalar * Fix struct compilation issues * Removed name_to_id for sanitizer --------- Co-authored-by: Meserve, Mark <Mark.Meserve@amd.com>
此提交包含在:
@@ -58,6 +58,7 @@ struct hash;
|
||||
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_counter_id_t)
|
||||
|
||||
@@ -94,6 +94,7 @@ less(Tp lhs, Tp rhs)
|
||||
|
||||
// declaration of operator== and operator!=
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_buffer_id_t)
|
||||
@@ -110,6 +111,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(hsa_amd_memory_pool_t)
|
||||
|
||||
// definitions of operator==
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_buffer_id_t)
|
||||
@@ -136,6 +138,7 @@ operator==(rocprofiler_dim3_t lhs, rocprofiler_dim3_t rhs)
|
||||
|
||||
// definitions of operator!=
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_buffer_id_t)
|
||||
@@ -152,6 +155,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_amd_memory_pool_t)
|
||||
|
||||
// definitions of operator<
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_buffer_id_t)
|
||||
@@ -183,6 +187,7 @@ operator<(rocprofiler_dim3_t lhs, rocprofiler_dim3_t rhs)
|
||||
|
||||
// definitions of operator>, operator<=, operator>=
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_buffer_id_t)
|
||||
|
||||
@@ -142,7 +142,7 @@ template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_address_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_FIELD(value);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(handle);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
@@ -367,7 +367,7 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_memory_allocation_data_t data)
|
||||
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
|
||||
ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.value, 16));
|
||||
ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.handle, 16));
|
||||
ROCP_SDK_SAVE_DATA_FIELD(allocation_size);
|
||||
}
|
||||
|
||||
@@ -594,7 +594,7 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_allocation_record_t data)
|
||||
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
|
||||
ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.value, 16));
|
||||
ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.handle, 16));
|
||||
ROCP_SDK_SAVE_DATA_FIELD(allocation_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -528,8 +528,8 @@ typedef union rocprofiler_user_data_t
|
||||
*/
|
||||
typedef union rocprofiler_address_t
|
||||
{
|
||||
uint64_t value; ///< usage example: store address in uint64_t format
|
||||
void* ptr; ///< usage example: generic form of address
|
||||
uint64_t handle; ///< usage example: store address in uint64_t format
|
||||
void* ptr; ///< usage example: generic form of address
|
||||
} rocprofiler_address_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -482,7 +482,7 @@ generate_csv(const output_config&
|
||||
api_name,
|
||||
agent_info,
|
||||
record.allocation_size,
|
||||
rocprofiler::sdk::utility::as_hex(record.address.value, 16),
|
||||
rocprofiler::sdk::utility::as_hex(record.address.handle, 16),
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
@@ -529,20 +529,24 @@ write_perfetto(
|
||||
// counter tracks
|
||||
{
|
||||
// memory copy counter track
|
||||
auto mem_cpy_endpoints = std::map<rocprofiler_agent_id_t, std::map<uint64_t, uint64_t>>{};
|
||||
auto mem_cpy_extremes = std::pair<uint64_t, uint64_t>{std::numeric_limits<uint64_t>::max(),
|
||||
auto mem_cpy_endpoints =
|
||||
std::map<rocprofiler_agent_id_t, std::map<rocprofiler_timestamp_t, uint64_t>>{};
|
||||
auto mem_cpy_extremes = std::pair<uint64_t, uint64_t>{std::numeric_limits<uint64_t>::max(),
|
||||
std::numeric_limits<uint64_t>::min()};
|
||||
auto constexpr timestamp_buffer = 1000;
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
uint64_t _mean_timestamp =
|
||||
itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp));
|
||||
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - 1000, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - timestamp_buffer,
|
||||
0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + 1000, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + timestamp_buffer,
|
||||
0);
|
||||
|
||||
mem_cpy_extremes =
|
||||
std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp),
|
||||
@@ -563,7 +567,8 @@ write_perfetto(
|
||||
mitr->second += itr.bytes;
|
||||
}
|
||||
|
||||
constexpr auto bytes_multiplier = 1024;
|
||||
constexpr auto bytes_multiplier = 1024;
|
||||
constexpr auto extremes_endpoint_buffer = 5000;
|
||||
|
||||
auto mem_cpy_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
||||
@@ -571,8 +576,10 @@ write_perfetto(
|
||||
mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size());
|
||||
for(auto& mitr : mem_cpy_endpoints)
|
||||
{
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - 5000, 0);
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.second + 5000, 0);
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - extremes_endpoint_buffer,
|
||||
0);
|
||||
mem_cpy_endpoints[mitr.first].emplace(
|
||||
mem_cpy_extremes.second + extremes_endpoint_buffer, 0);
|
||||
|
||||
auto _track_name = std::stringstream{};
|
||||
const auto* _agent = _get_agent(mitr.first);
|
||||
@@ -604,77 +611,153 @@ write_perfetto(
|
||||
}
|
||||
|
||||
// memory allocation counter track
|
||||
auto mem_alloc_endpoints = std::map<rocprofiler_agent_id_t, std::map<uint64_t, uint64_t>>{};
|
||||
auto mem_alloc_extremes = std::pair<uint64_t, uint64_t>{
|
||||
constexpr auto null_rocp_agent_id =
|
||||
rocprofiler_agent_id_t{.handle = std::numeric_limits<uint64_t>::max()};
|
||||
struct free_memory_information
|
||||
{
|
||||
rocprofiler_timestamp_t start_timestamp = 0;
|
||||
rocprofiler_timestamp_t end_timestamp = 0;
|
||||
rocprofiler_address_t address = {.handle = 0};
|
||||
};
|
||||
|
||||
struct memory_information
|
||||
{
|
||||
uint64_t alloc_size = {0};
|
||||
rocprofiler_address_t address = {.handle = 0};
|
||||
bool is_alloc_op = {false};
|
||||
};
|
||||
|
||||
struct agent_and_size
|
||||
{
|
||||
rocprofiler_agent_id_t agent_id =
|
||||
rocprofiler_agent_id_t{.handle = std::numeric_limits<uint64_t>::max()};
|
||||
uint64_t size = {0};
|
||||
};
|
||||
|
||||
auto mem_alloc_endpoints =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::map<rocprofiler_timestamp_t, memory_information>>{};
|
||||
auto mem_alloc_extremes = std::pair<uint64_t, uint64_t>{
|
||||
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};
|
||||
auto address_to_size = std::unordered_map<uint64_t, uint64_t>{};
|
||||
auto address_to_agent_and_size =
|
||||
std::unordered_map<rocprofiler_address_t, agent_and_size>{};
|
||||
auto free_mem_info = std::vector<free_memory_information>{};
|
||||
|
||||
// Load memory allocation endpoints
|
||||
for(auto ditr : memory_allocation_gen)
|
||||
for(auto itr : memory_allocation_gen.get(ditr))
|
||||
{
|
||||
uint64_t _mean_timestamp =
|
||||
itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp));
|
||||
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(itr.start_timestamp - 1000, 0);
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(itr.start_timestamp, 0);
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(_mean_timestamp, 0);
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(itr.end_timestamp, 0);
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(itr.end_timestamp + 1000, 0);
|
||||
|
||||
mem_alloc_extremes =
|
||||
std::make_pair(std::min(mem_alloc_extremes.first, itr.start_timestamp),
|
||||
std::max(mem_alloc_extremes.second, itr.end_timestamp));
|
||||
if(itr.operation == ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE ||
|
||||
itr.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE)
|
||||
{
|
||||
address_to_size.emplace(itr.address.value, itr.allocation_size);
|
||||
LOG_IF(FATAL, itr.agent_id == null_rocp_agent_id)
|
||||
<< "Missing agent id for memory allocation trace";
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(
|
||||
itr.start_timestamp,
|
||||
memory_information{itr.allocation_size, itr.address, true});
|
||||
mem_alloc_endpoints[itr.agent_id].emplace(
|
||||
itr.end_timestamp,
|
||||
memory_information{itr.allocation_size, itr.address, true});
|
||||
address_to_agent_and_size.emplace(
|
||||
itr.address, agent_and_size{itr.agent_id, itr.allocation_size});
|
||||
}
|
||||
else if(itr.operation == ROCPROFILER_MEMORY_ALLOCATION_FREE ||
|
||||
itr.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_FREE)
|
||||
{
|
||||
// Store free memory operations in seperate vector to pair with agent
|
||||
// and allocation size in following loop
|
||||
free_mem_info.push_back(free_memory_information{
|
||||
itr.start_timestamp, itr.end_timestamp, itr.address});
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCP_CI_LOG(WARNING) << "unhandled memory allocation type " << itr.operation;
|
||||
}
|
||||
}
|
||||
|
||||
for(auto ditr : memory_allocation_gen)
|
||||
for(auto itr : memory_allocation_gen.get(ditr))
|
||||
// Add free memory operations to the endpoint map
|
||||
for(const auto& itr : free_mem_info)
|
||||
{
|
||||
if(address_to_agent_and_size.count(itr.address) == 0)
|
||||
{
|
||||
auto alloc_beg =
|
||||
mem_alloc_endpoints.at(itr.agent_id).lower_bound(itr.start_timestamp);
|
||||
auto alloc_end =
|
||||
mem_alloc_endpoints.at(itr.agent_id).upper_bound(itr.end_timestamp);
|
||||
|
||||
LOG_IF(FATAL, alloc_beg == alloc_end)
|
||||
<< "Missing range for timestamp [" << itr.start_timestamp << ", "
|
||||
<< itr.end_timestamp << "]";
|
||||
|
||||
for(auto alloc_itr = alloc_beg; alloc_itr != alloc_end; ++alloc_itr)
|
||||
if(itr.address.handle == 0)
|
||||
{
|
||||
if(address_to_size.count(itr.address.value) > 0)
|
||||
// Freeing null pointers is expected behavior and is occurs in HSA functions
|
||||
// like hipStreamDestroy
|
||||
ROCP_INFO << "null pointer freed due to HSA operation";
|
||||
}
|
||||
else
|
||||
{
|
||||
// Following should not occur
|
||||
ROCP_INFO << "Unpaired free operation occurred";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
auto [agent_id, allocation_size] = address_to_agent_and_size[itr.address];
|
||||
mem_alloc_endpoints[agent_id].emplace(
|
||||
itr.start_timestamp, memory_information{allocation_size, itr.address, false});
|
||||
mem_alloc_endpoints[agent_id].emplace(
|
||||
itr.end_timestamp, memory_information{allocation_size, itr.address, false});
|
||||
}
|
||||
// Create running sum of allocated memory
|
||||
for(auto& [_, endpoint_map] : mem_alloc_endpoints)
|
||||
{
|
||||
if(!endpoint_map.empty())
|
||||
{
|
||||
auto earliest_agent_timestamp = endpoint_map.begin()->first;
|
||||
auto latest_agent_timestamp = (--endpoint_map.end())->first;
|
||||
mem_alloc_extremes =
|
||||
std::make_pair(std::min(mem_alloc_extremes.first, earliest_agent_timestamp),
|
||||
std::max(mem_alloc_extremes.second, latest_agent_timestamp));
|
||||
}
|
||||
if(endpoint_map.size() <= 1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
auto prev = endpoint_map.begin();
|
||||
auto itr = std::next(prev);
|
||||
for(; itr != endpoint_map.end(); ++itr, ++prev)
|
||||
{
|
||||
// If address or allocation type are different, add or subtract from running sum
|
||||
if(prev->second.address != itr->second.address ||
|
||||
prev->second.is_alloc_op != itr->second.is_alloc_op)
|
||||
{
|
||||
if(itr->second.is_alloc_op)
|
||||
{
|
||||
alloc_itr->second += address_to_size.at(itr.address.value);
|
||||
itr->second.alloc_size += prev->second.alloc_size;
|
||||
}
|
||||
else if(prev->second.alloc_size >= itr->second.alloc_size)
|
||||
{
|
||||
itr->second.alloc_size = prev->second.alloc_size - itr->second.alloc_size;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
itr->second.alloc_size = prev->second.alloc_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_alloc_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
||||
auto mem_alloc_cnt_names = std::vector<std::string>{};
|
||||
constexpr auto null_rocp_agent_id =
|
||||
rocprofiler_agent_id_t{.handle = std::numeric_limits<uint64_t>::max()};
|
||||
auto mem_alloc_cnt_names = std::vector<std::string>{};
|
||||
mem_alloc_cnt_names.reserve(mem_alloc_endpoints.size());
|
||||
for(auto& alloc_itr : mem_alloc_endpoints)
|
||||
{
|
||||
mem_alloc_endpoints[alloc_itr.first].emplace(mem_alloc_extremes.first - 5000, 0);
|
||||
mem_alloc_endpoints[alloc_itr.first].emplace(mem_alloc_extremes.second + 5000, 0);
|
||||
mem_alloc_endpoints[alloc_itr.first].emplace(
|
||||
mem_alloc_extremes.first - extremes_endpoint_buffer,
|
||||
memory_information{0, {0}, false});
|
||||
mem_alloc_endpoints[alloc_itr.first].emplace(
|
||||
mem_alloc_extremes.second + extremes_endpoint_buffer,
|
||||
memory_information{0, {0}, false});
|
||||
|
||||
auto _track_name = std::stringstream{};
|
||||
const rocprofiler_agent_t* _agent = nullptr;
|
||||
if(alloc_itr.first != null_rocp_agent_id)
|
||||
{
|
||||
_agent = _get_agent(alloc_itr.first);
|
||||
}
|
||||
const rocprofiler_agent_t* _agent = _get_agent(alloc_itr.first);
|
||||
|
||||
if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_track_name << "ALLOCATE BYTES on AGENT [" << _agent->logical_node_id << "] (CPU)";
|
||||
else if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_track_name << "ALLOCATE BYTES on AGENT [" << _agent->logical_node_id << "] (GPU)";
|
||||
else
|
||||
_track_name << "FREE BYTES";
|
||||
|
||||
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
||||
auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str());
|
||||
@@ -692,7 +775,7 @@ write_perfetto(
|
||||
TRACE_COUNTER(sdk::perfetto_category<sdk::category::memory_allocation>::name,
|
||||
mem_alloc_tracks.at(alloc_itr.first),
|
||||
itr.first,
|
||||
itr.second / bytes_multiplier);
|
||||
itr.second.alloc_size / bytes_multiplier);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,10 +75,10 @@ query_pc_sampling_configuration(const rocprofiler_pc_sampling_configuration_t* c
|
||||
} // namespace
|
||||
|
||||
kernel_symbol_info::kernel_symbol_info()
|
||||
: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, {.value = 0}}
|
||||
: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, {.handle = 0}}
|
||||
{}
|
||||
|
||||
constexpr auto null_address_v = rocprofiler_address_t{.value = 0};
|
||||
constexpr auto null_address_v = rocprofiler_address_t{.handle = 0};
|
||||
constexpr auto null_dim3_v = rocprofiler_dim3_t{.x = 0, .y = 0, .z = 0};
|
||||
|
||||
host_function_info::host_function_info()
|
||||
|
||||
@@ -488,7 +488,7 @@ executable_iterate_agent_symbols_load_callback(hsa_executable_t executabl
|
||||
if(CHECK_NOTNULL(code_obj_v) && CHECK_NOTNULL(kernel_descript))
|
||||
{
|
||||
data.kernel_code_entry_byte_offset = kernel_descript->kernel_code_entry_byte_offset;
|
||||
data.kernel_address.value = data.kernel_object + data.kernel_code_entry_byte_offset;
|
||||
data.kernel_address.handle = data.kernel_object + data.kernel_code_entry_byte_offset;
|
||||
|
||||
if(const auto* rocp_agent = agent::get_agent(code_obj_v->rocp_data.rocp_agent);
|
||||
CHECK_NOTNULL(rocp_agent))
|
||||
|
||||
@@ -336,7 +336,7 @@ struct memory_allocation_data
|
||||
rocprofiler_thread_id_t tid = common::get_tid();
|
||||
rocprofiler_agent_id_t agent = null_rocp_agent_id;
|
||||
uint64_t size_allocated = 0;
|
||||
rocprofiler_address_t address = {.value = 0};
|
||||
rocprofiler_address_t address = {.handle = 0};
|
||||
uint64_t start_ts = 0;
|
||||
context::correlation_id* correlation_id = nullptr;
|
||||
tracing::tracing_data tracing_data = {};
|
||||
@@ -428,7 +428,7 @@ handle_starting_addr(void** starting_addr_pointer)
|
||||
rocprofiler_address_t
|
||||
handle_starting_addr(hsa_amd_vmem_alloc_handle_t* vmem_alloc_handle)
|
||||
{
|
||||
return rocprofiler_address_t{.value = (vmem_alloc_handle) ? vmem_alloc_handle->handle : 0};
|
||||
return rocprofiler_address_t{.handle = (vmem_alloc_handle) ? vmem_alloc_handle->handle : 0};
|
||||
}
|
||||
|
||||
// Handling starting address for free memory operations
|
||||
@@ -442,7 +442,7 @@ handle_starting_addr(void* starting_addr_pointer)
|
||||
rocprofiler_address_t
|
||||
handle_starting_addr(hsa_amd_vmem_alloc_handle_t vmem_alloc_handle)
|
||||
{
|
||||
return rocprofiler_address_t{.value = vmem_alloc_handle.handle};
|
||||
return rocprofiler_address_t{.handle = vmem_alloc_handle.handle};
|
||||
}
|
||||
|
||||
// Wrapper implementation that stores memory allocation information
|
||||
|
||||
@@ -185,6 +185,8 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age
|
||||
hsa_region_t* ptr_reg = ®ion_list[0];
|
||||
status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg);
|
||||
RET_IF_HSA_ERR(status)
|
||||
auto address_vec = std::vector<void*>{};
|
||||
address_vec.reserve(i);
|
||||
|
||||
for(size_t j = 0; j < i; ++j)
|
||||
{
|
||||
@@ -192,6 +194,10 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age
|
||||
|
||||
status = hsa_memory_allocate(region_list[0], base_size, &addr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
address_vec.emplace_back(addr);
|
||||
}
|
||||
for(void* addr : address_vec)
|
||||
{
|
||||
status = hsa_memory_free(addr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
}
|
||||
@@ -215,6 +221,8 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_
|
||||
hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0];
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool);
|
||||
RET_IF_HSA_ERR(status)
|
||||
auto address_vec = std::vector<void*>{};
|
||||
address_vec.reserve(i);
|
||||
|
||||
for(size_t j = 0; j < i; ++j)
|
||||
{
|
||||
@@ -223,6 +231,10 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_
|
||||
|
||||
status = hsa_amd_memory_pool_allocate(memory_pool_list[0], base_size, flags, &addr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
address_vec.emplace_back(addr);
|
||||
}
|
||||
for(void* addr : address_vec)
|
||||
{
|
||||
status = hsa_amd_memory_pool_free(addr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
}
|
||||
|
||||
新增問題並參考
封鎖使用者