From cc0c401615ffb2ec73d669aba7073fcd1cf8899c Mon Sep 17 00:00:00 2001 From: "Trowbridge, Ian" Date: Wed, 12 Feb 2025 12:59:53 -0600 Subject: [PATCH] Memory Allocation Counter Track Shows Total Allocation (#71) * Counter track for memory allocation is now a running sum showing total allocation * Address review comments * Update source/lib/output/generatePerfetto.cpp Co-authored-by: Meserve, Mark * Updated to reflect review comments * Fix compilation errors on CI * remove braces on scalar * Fix struct compilation issues * Removed name_to_id for sanitizer --------- Co-authored-by: Meserve, Mark --- source/include/rocprofiler-sdk/cxx/hash.hpp | 1 + .../include/rocprofiler-sdk/cxx/operators.hpp | 5 + .../rocprofiler-sdk/cxx/serialization.hpp | 6 +- source/include/rocprofiler-sdk/fwd.h | 4 +- source/lib/output/generateCSV.cpp | 2 +- source/lib/output/generatePerfetto.cpp | 189 +++++++++++++----- source/lib/output/metadata.cpp | 4 +- .../code_object/code_object.cpp | 2 +- .../rocprofiler-sdk/hsa/memory_allocation.cpp | 6 +- .../hsa-memory-allocation.cpp | 12 ++ 10 files changed, 166 insertions(+), 65 deletions(-) diff --git a/source/include/rocprofiler-sdk/cxx/hash.hpp b/source/include/rocprofiler-sdk/cxx/hash.hpp index 3268d86309..bee05546ee 100644 --- a/source/include/rocprofiler-sdk/cxx/hash.hpp +++ b/source/include/rocprofiler-sdk/cxx/hash.hpp @@ -58,6 +58,7 @@ struct hash; ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_context_id_t) ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_agent_id_t) +ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_address_t) ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_queue_id_t) ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_buffer_id_t) ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_counter_id_t) diff --git a/source/include/rocprofiler-sdk/cxx/operators.hpp b/source/include/rocprofiler-sdk/cxx/operators.hpp index 41aebadb92..37c3c6655e 100644 --- a/source/include/rocprofiler-sdk/cxx/operators.hpp +++ b/source/include/rocprofiler-sdk/cxx/operators.hpp @@ -94,6 +94,7 @@ less(Tp lhs, Tp rhs) // declaration of operator== and operator!= ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_context_id_t) +ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_address_t) ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_agent_id_t) ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_queue_id_t) ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_buffer_id_t) @@ -110,6 +111,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(hsa_amd_memory_pool_t) // definitions of operator== ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t) +ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_address_t) ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_agent_id_t) ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_queue_id_t) ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_buffer_id_t) @@ -136,6 +138,7 @@ operator==(rocprofiler_dim3_t lhs, rocprofiler_dim3_t rhs) // definitions of operator!= ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t) +ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_agent_id_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_queue_id_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_buffer_id_t) @@ -152,6 +155,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_amd_memory_pool_t) // definitions of operator< ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t) +ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_address_t) ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_agent_id_t) ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_queue_id_t) ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_buffer_id_t) @@ -183,6 +187,7 @@ operator<(rocprofiler_dim3_t lhs, rocprofiler_dim3_t rhs) // definitions of operator>, operator<=, operator>= ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t) +ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_agent_id_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_queue_id_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_buffer_id_t) diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index 95effccb85..011eb539aa 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -142,7 +142,7 @@ template void save(ArchiveT& ar, rocprofiler_address_t data) { - ROCP_SDK_SAVE_DATA_FIELD(value); + ROCP_SDK_SAVE_DATA_FIELD(handle); } template @@ -367,7 +367,7 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_memory_allocation_data_t data) ROCP_SDK_SAVE_DATA_FIELD(start_timestamp); ROCP_SDK_SAVE_DATA_FIELD(end_timestamp); ROCP_SDK_SAVE_DATA_FIELD(agent_id); - ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.value, 16)); + ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.handle, 16)); ROCP_SDK_SAVE_DATA_FIELD(allocation_size); } @@ -594,7 +594,7 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_allocation_record_t data) ROCP_SDK_SAVE_DATA_FIELD(start_timestamp); ROCP_SDK_SAVE_DATA_FIELD(end_timestamp); ROCP_SDK_SAVE_DATA_FIELD(agent_id); - ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.value, 16)); + ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.handle, 16)); ROCP_SDK_SAVE_DATA_FIELD(allocation_size); } diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index fe3a23968b..ffff002923 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -528,8 +528,8 @@ typedef union rocprofiler_user_data_t */ typedef union rocprofiler_address_t { - uint64_t value; ///< usage example: store address in uint64_t format - void* ptr; ///< usage example: generic form of address + uint64_t handle; ///< usage example: store address in uint64_t format + void* ptr; ///< usage example: generic form of address } rocprofiler_address_t; /** diff --git a/source/lib/output/generateCSV.cpp b/source/lib/output/generateCSV.cpp index 243ed12dd5..2326973f64 100644 --- a/source/lib/output/generateCSV.cpp +++ b/source/lib/output/generateCSV.cpp @@ -482,7 +482,7 @@ generate_csv(const output_config& api_name, agent_info, record.allocation_size, - rocprofiler::sdk::utility::as_hex(record.address.value, 16), + rocprofiler::sdk::utility::as_hex(record.address.handle, 16), record.correlation_id.internal, record.start_timestamp, record.end_timestamp); diff --git a/source/lib/output/generatePerfetto.cpp b/source/lib/output/generatePerfetto.cpp index 53185927d9..2d604b712d 100644 --- a/source/lib/output/generatePerfetto.cpp +++ b/source/lib/output/generatePerfetto.cpp @@ -529,20 +529,24 @@ write_perfetto( // counter tracks { // memory copy counter track - auto mem_cpy_endpoints = std::map>{}; - auto mem_cpy_extremes = std::pair{std::numeric_limits::max(), + auto mem_cpy_endpoints = + std::map>{}; + auto mem_cpy_extremes = std::pair{std::numeric_limits::max(), std::numeric_limits::min()}; + auto constexpr timestamp_buffer = 1000; for(auto ditr : memory_copy_gen) for(auto itr : memory_copy_gen.get(ditr)) { uint64_t _mean_timestamp = itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp)); - mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - 1000, 0); + mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - timestamp_buffer, + 0); mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0); mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0); mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0); - mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + 1000, 0); + mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + timestamp_buffer, + 0); mem_cpy_extremes = std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp), @@ -563,7 +567,8 @@ write_perfetto( mitr->second += itr.bytes; } - constexpr auto bytes_multiplier = 1024; + constexpr auto bytes_multiplier = 1024; + constexpr auto extremes_endpoint_buffer = 5000; auto mem_cpy_tracks = std::unordered_map{}; @@ -571,8 +576,10 @@ write_perfetto( mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size()); for(auto& mitr : mem_cpy_endpoints) { - mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - 5000, 0); - mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.second + 5000, 0); + mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - extremes_endpoint_buffer, + 0); + mem_cpy_endpoints[mitr.first].emplace( + mem_cpy_extremes.second + extremes_endpoint_buffer, 0); auto _track_name = std::stringstream{}; const auto* _agent = _get_agent(mitr.first); @@ -604,77 +611,153 @@ write_perfetto( } // memory allocation counter track - auto mem_alloc_endpoints = std::map>{}; - auto mem_alloc_extremes = std::pair{ + constexpr auto null_rocp_agent_id = + rocprofiler_agent_id_t{.handle = std::numeric_limits::max()}; + struct free_memory_information + { + rocprofiler_timestamp_t start_timestamp = 0; + rocprofiler_timestamp_t end_timestamp = 0; + rocprofiler_address_t address = {.handle = 0}; + }; + + struct memory_information + { + uint64_t alloc_size = {0}; + rocprofiler_address_t address = {.handle = 0}; + bool is_alloc_op = {false}; + }; + + struct agent_and_size + { + rocprofiler_agent_id_t agent_id = + rocprofiler_agent_id_t{.handle = std::numeric_limits::max()}; + uint64_t size = {0}; + }; + + auto mem_alloc_endpoints = + std::unordered_map>{}; + auto mem_alloc_extremes = std::pair{ std::numeric_limits::max(), std::numeric_limits::min()}; - auto address_to_size = std::unordered_map{}; + auto address_to_agent_and_size = + std::unordered_map{}; + auto free_mem_info = std::vector{}; + + // Load memory allocation endpoints for(auto ditr : memory_allocation_gen) for(auto itr : memory_allocation_gen.get(ditr)) { - uint64_t _mean_timestamp = - itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp)); - - mem_alloc_endpoints[itr.agent_id].emplace(itr.start_timestamp - 1000, 0); - mem_alloc_endpoints[itr.agent_id].emplace(itr.start_timestamp, 0); - mem_alloc_endpoints[itr.agent_id].emplace(_mean_timestamp, 0); - mem_alloc_endpoints[itr.agent_id].emplace(itr.end_timestamp, 0); - mem_alloc_endpoints[itr.agent_id].emplace(itr.end_timestamp + 1000, 0); - - mem_alloc_extremes = - std::make_pair(std::min(mem_alloc_extremes.first, itr.start_timestamp), - std::max(mem_alloc_extremes.second, itr.end_timestamp)); if(itr.operation == ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE || itr.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE) { - address_to_size.emplace(itr.address.value, itr.allocation_size); + LOG_IF(FATAL, itr.agent_id == null_rocp_agent_id) + << "Missing agent id for memory allocation trace"; + mem_alloc_endpoints[itr.agent_id].emplace( + itr.start_timestamp, + memory_information{itr.allocation_size, itr.address, true}); + mem_alloc_endpoints[itr.agent_id].emplace( + itr.end_timestamp, + memory_information{itr.allocation_size, itr.address, true}); + address_to_agent_and_size.emplace( + itr.address, agent_and_size{itr.agent_id, itr.allocation_size}); + } + else if(itr.operation == ROCPROFILER_MEMORY_ALLOCATION_FREE || + itr.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_FREE) + { + // Store free memory operations in seperate vector to pair with agent + // and allocation size in following loop + free_mem_info.push_back(free_memory_information{ + itr.start_timestamp, itr.end_timestamp, itr.address}); + } + else + { + ROCP_CI_LOG(WARNING) << "unhandled memory allocation type " << itr.operation; } } - - for(auto ditr : memory_allocation_gen) - for(auto itr : memory_allocation_gen.get(ditr)) + // Add free memory operations to the endpoint map + for(const auto& itr : free_mem_info) + { + if(address_to_agent_and_size.count(itr.address) == 0) { - auto alloc_beg = - mem_alloc_endpoints.at(itr.agent_id).lower_bound(itr.start_timestamp); - auto alloc_end = - mem_alloc_endpoints.at(itr.agent_id).upper_bound(itr.end_timestamp); - - LOG_IF(FATAL, alloc_beg == alloc_end) - << "Missing range for timestamp [" << itr.start_timestamp << ", " - << itr.end_timestamp << "]"; - - for(auto alloc_itr = alloc_beg; alloc_itr != alloc_end; ++alloc_itr) + if(itr.address.handle == 0) { - if(address_to_size.count(itr.address.value) > 0) + // Freeing null pointers is expected behavior and is occurs in HSA functions + // like hipStreamDestroy + ROCP_INFO << "null pointer freed due to HSA operation"; + } + else + { + // Following should not occur + ROCP_INFO << "Unpaired free operation occurred"; + } + continue; + } + auto [agent_id, allocation_size] = address_to_agent_and_size[itr.address]; + mem_alloc_endpoints[agent_id].emplace( + itr.start_timestamp, memory_information{allocation_size, itr.address, false}); + mem_alloc_endpoints[agent_id].emplace( + itr.end_timestamp, memory_information{allocation_size, itr.address, false}); + } + // Create running sum of allocated memory + for(auto& [_, endpoint_map] : mem_alloc_endpoints) + { + if(!endpoint_map.empty()) + { + auto earliest_agent_timestamp = endpoint_map.begin()->first; + auto latest_agent_timestamp = (--endpoint_map.end())->first; + mem_alloc_extremes = + std::make_pair(std::min(mem_alloc_extremes.first, earliest_agent_timestamp), + std::max(mem_alloc_extremes.second, latest_agent_timestamp)); + } + if(endpoint_map.size() <= 1) + { + continue; + } + + auto prev = endpoint_map.begin(); + auto itr = std::next(prev); + for(; itr != endpoint_map.end(); ++itr, ++prev) + { + // If address or allocation type are different, add or subtract from running sum + if(prev->second.address != itr->second.address || + prev->second.is_alloc_op != itr->second.is_alloc_op) + { + if(itr->second.is_alloc_op) { - alloc_itr->second += address_to_size.at(itr.address.value); + itr->second.alloc_size += prev->second.alloc_size; + } + else if(prev->second.alloc_size >= itr->second.alloc_size) + { + itr->second.alloc_size = prev->second.alloc_size - itr->second.alloc_size; } } + else + { + itr->second.alloc_size = prev->second.alloc_size; + } } + } auto mem_alloc_tracks = std::unordered_map{}; - auto mem_alloc_cnt_names = std::vector{}; - constexpr auto null_rocp_agent_id = - rocprofiler_agent_id_t{.handle = std::numeric_limits::max()}; + auto mem_alloc_cnt_names = std::vector{}; mem_alloc_cnt_names.reserve(mem_alloc_endpoints.size()); for(auto& alloc_itr : mem_alloc_endpoints) { - mem_alloc_endpoints[alloc_itr.first].emplace(mem_alloc_extremes.first - 5000, 0); - mem_alloc_endpoints[alloc_itr.first].emplace(mem_alloc_extremes.second + 5000, 0); + mem_alloc_endpoints[alloc_itr.first].emplace( + mem_alloc_extremes.first - extremes_endpoint_buffer, + memory_information{0, {0}, false}); + mem_alloc_endpoints[alloc_itr.first].emplace( + mem_alloc_extremes.second + extremes_endpoint_buffer, + memory_information{0, {0}, false}); auto _track_name = std::stringstream{}; - const rocprofiler_agent_t* _agent = nullptr; - if(alloc_itr.first != null_rocp_agent_id) - { - _agent = _get_agent(alloc_itr.first); - } + const rocprofiler_agent_t* _agent = _get_agent(alloc_itr.first); - if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_CPU) + if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) _track_name << "ALLOCATE BYTES on AGENT [" << _agent->logical_node_id << "] (CPU)"; - else if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_GPU) + else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) _track_name << "ALLOCATE BYTES on AGENT [" << _agent->logical_node_id << "] (GPU)"; - else - _track_name << "FREE BYTES"; constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str()); @@ -692,7 +775,7 @@ write_perfetto( TRACE_COUNTER(sdk::perfetto_category::name, mem_alloc_tracks.at(alloc_itr.first), itr.first, - itr.second / bytes_multiplier); + itr.second.alloc_size / bytes_multiplier); tracing_session->FlushBlocking(); } } diff --git a/source/lib/output/metadata.cpp b/source/lib/output/metadata.cpp index 61f1532d0c..a79ab335ba 100644 --- a/source/lib/output/metadata.cpp +++ b/source/lib/output/metadata.cpp @@ -75,10 +75,10 @@ query_pc_sampling_configuration(const rocprofiler_pc_sampling_configuration_t* c } // namespace kernel_symbol_info::kernel_symbol_info() -: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, {.value = 0}} +: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, {.handle = 0}} {} -constexpr auto null_address_v = rocprofiler_address_t{.value = 0}; +constexpr auto null_address_v = rocprofiler_address_t{.handle = 0}; constexpr auto null_dim3_v = rocprofiler_dim3_t{.x = 0, .y = 0, .z = 0}; host_function_info::host_function_info() diff --git a/source/lib/rocprofiler-sdk/code_object/code_object.cpp b/source/lib/rocprofiler-sdk/code_object/code_object.cpp index 332e3a15f4..08cfe5958c 100644 --- a/source/lib/rocprofiler-sdk/code_object/code_object.cpp +++ b/source/lib/rocprofiler-sdk/code_object/code_object.cpp @@ -488,7 +488,7 @@ executable_iterate_agent_symbols_load_callback(hsa_executable_t executabl if(CHECK_NOTNULL(code_obj_v) && CHECK_NOTNULL(kernel_descript)) { data.kernel_code_entry_byte_offset = kernel_descript->kernel_code_entry_byte_offset; - data.kernel_address.value = data.kernel_object + data.kernel_code_entry_byte_offset; + data.kernel_address.handle = data.kernel_object + data.kernel_code_entry_byte_offset; if(const auto* rocp_agent = agent::get_agent(code_obj_v->rocp_data.rocp_agent); CHECK_NOTNULL(rocp_agent)) diff --git a/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp b/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp index fadd9c30ba..bedbd3589e 100644 --- a/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp +++ b/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp @@ -336,7 +336,7 @@ struct memory_allocation_data rocprofiler_thread_id_t tid = common::get_tid(); rocprofiler_agent_id_t agent = null_rocp_agent_id; uint64_t size_allocated = 0; - rocprofiler_address_t address = {.value = 0}; + rocprofiler_address_t address = {.handle = 0}; uint64_t start_ts = 0; context::correlation_id* correlation_id = nullptr; tracing::tracing_data tracing_data = {}; @@ -428,7 +428,7 @@ handle_starting_addr(void** starting_addr_pointer) rocprofiler_address_t handle_starting_addr(hsa_amd_vmem_alloc_handle_t* vmem_alloc_handle) { - return rocprofiler_address_t{.value = (vmem_alloc_handle) ? vmem_alloc_handle->handle : 0}; + return rocprofiler_address_t{.handle = (vmem_alloc_handle) ? vmem_alloc_handle->handle : 0}; } // Handling starting address for free memory operations @@ -442,7 +442,7 @@ handle_starting_addr(void* starting_addr_pointer) rocprofiler_address_t handle_starting_addr(hsa_amd_vmem_alloc_handle_t vmem_alloc_handle) { - return rocprofiler_address_t{.value = vmem_alloc_handle.handle}; + return rocprofiler_address_t{.handle = vmem_alloc_handle.handle}; } // Wrapper implementation that stores memory allocation information diff --git a/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp b/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp index b3a75cafdd..9f7de38493 100644 --- a/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp +++ b/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp @@ -185,6 +185,8 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age hsa_region_t* ptr_reg = ®ion_list[0]; status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); RET_IF_HSA_ERR(status) + auto address_vec = std::vector{}; + address_vec.reserve(i); for(size_t j = 0; j < i; ++j) { @@ -192,6 +194,10 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age status = hsa_memory_allocate(region_list[0], base_size, &addr); RET_IF_HSA_ERR(status) + address_vec.emplace_back(addr); + } + for(void* addr : address_vec) + { status = hsa_memory_free(addr); RET_IF_HSA_ERR(status) } @@ -215,6 +221,8 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_ hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0]; status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool); RET_IF_HSA_ERR(status) + auto address_vec = std::vector{}; + address_vec.reserve(i); for(size_t j = 0; j < i; ++j) { @@ -223,6 +231,10 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_ status = hsa_amd_memory_pool_allocate(memory_pool_list[0], base_size, flags, &addr); RET_IF_HSA_ERR(status) + address_vec.emplace_back(addr); + } + for(void* addr : address_vec) + { status = hsa_amd_memory_pool_free(addr); RET_IF_HSA_ERR(status) }