From d04f7095f4765c2bd12bad8e84c8bfb714f1753b Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 20 Apr 2018 11:15:26 -0500 Subject: [PATCH] HSA memory alloc/copy/free refactoring --- src/core/context.h | 4 +-- src/core/profile.h | 4 +-- src/util/hsa_rsrc_factory.cpp | 18 +++++++++---- src/util/hsa_rsrc_factory.h | 7 +++-- test/CMakeLists.txt | 2 +- test/ctrl/test_hsa.cpp | 7 +++-- test/ctrl/test_kernel.h | 9 ++++++- test/ctrl/tool.cpp | 47 +++++++++++++++++++++++++--------- test/util/hsa_rsrc_factory.cpp | 18 +++++++++---- test/util/hsa_rsrc_factory.h | 7 +++-- 10 files changed, 87 insertions(+), 36 deletions(-) diff --git a/src/core/context.h b/src/core/context.h index a7a101bbb7..60ebed61b1 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -446,8 +446,8 @@ class Context { else EXC_RAISING(HSA_STATUS_ERROR, "SQTT data out of output buffer"); } - hsa_status_t status = hsa_memory_copy(dest, src, size); - if (status == HSA_STATUS_SUCCESS) { + const bool suc = util::HsaRsrcFactory::CopyToHost(dest, src, size); + if (suc) { *header = size; callback_data->ptr = dest + align_size(size, sizeof(uint32_t)); rinfo->data.result_bytes.instance_count = sample_id + 1; diff --git a/src/core/profile.h b/src/core/profile.h index 71ad2289b7..79c68e6ef6 100644 --- a/src/core/profile.h +++ b/src/core/profile.h @@ -88,8 +88,8 @@ class Profile { } virtual ~Profile() { info_vector_.clear(); - if (profile_.command_buffer.ptr) hsa_memory_free(profile_.command_buffer.ptr); - if (profile_.output_buffer.ptr) hsa_memory_free(profile_.output_buffer.ptr); + if (profile_.command_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.command_buffer.ptr); + if (profile_.output_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.output_buffer.ptr); if (profile_.events) free(const_cast(profile_.events)); if (profile_.parameters) free(const_cast(profile_.parameters)); if (completion_signal_.handle) { diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index fc64b072ef..e77580f65c 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -331,6 +331,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); } + CHECK_STATUS("hsa_memory_allocate", status); return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; } @@ -348,14 +349,21 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); + CHECK_STATUS("hsa_memory_allocate", status); return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; } -// Transfer data method -bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length, - bool host_to_dev) { - hsa_status_t status; - status = hsa_memory_copy(dest_buff, src_buff, length); +// Memcopy method +bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) { + const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length); + CHECK_STATUS("hsa_memory_copy", status); + return (status == HSA_STATUS_SUCCESS); +} + +// Free method +bool HsaRsrcFactory::MemoryFree(void* ptr) { + const hsa_status_t status = hsa_memory_free(ptr); + CHECK_STATUS("hsa_memory_free", status); return (status == HSA_STATUS_SUCCESS); } diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index a4b5fa3620..40f8c165bf 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -215,8 +215,11 @@ class HsaRsrcFactory { // uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); - // Transfer data method - bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev); + // Memcopy method + static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length); + + // Free method + static bool MemoryFree(void* ptr); // Loads an Assembled Brig file and Finalizes it into Device Isa // diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f484403939..795325ab37 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -34,7 +34,7 @@ execute_process ( COMMAND sh -xc "mkdir -p ${PROJECT_BINARY_DIR}/RESULTS" ) ## Build test library set ( TEST_LIB "tool" ) -set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp ) +set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp ${UTIL_SRC} ) add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} ) target_include_directories ( ${TEST_LIB} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) target_link_libraries( ${TEST_LIB} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic ) diff --git a/test/ctrl/test_hsa.cpp b/test/ctrl/test_hsa.cpp index c2e4536449..edcfd570b2 100644 --- a/test/ctrl/test_hsa.cpp +++ b/test/ctrl/test_hsa.cpp @@ -214,11 +214,10 @@ bool TestHsa::Run() { total_time_taken_ += dispatch_time_taken_; // Copy kernel buffers from local memory into system memory - hsa_rsrc_->TransferData(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize(), - false); - test_->PrintOutput(); + const bool suc = hsa_rsrc_->CopyToHost(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize()); + if (suc) test_->PrintOutput(); - return true; + return suc; } bool TestHsa::VerifyResults() { diff --git a/test/ctrl/test_kernel.h b/test/ctrl/test_kernel.h index 5427ba3144..8bdbe1a525 100644 --- a/test/ctrl/test_kernel.h +++ b/test/ctrl/test_kernel.h @@ -31,6 +31,8 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include "util/hsa_rsrc_factory.h" + // Class implements kernel test class TestKernel { public: @@ -49,7 +51,12 @@ class TestKernel { typedef mem_map_t::iterator mem_it_t; typedef mem_map_t::const_iterator mem_const_it_t; - virtual ~TestKernel() {} + virtual ~TestKernel() { + for (auto& entry : mem_map_) { + void* ptr = entry.second.ptr; + if (ptr != NULL) HsaRsrcFactory::MemoryFree(ptr); + } + } // Initialize method virtual void Init() = 0; diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index cd1923d1a5..b56301243e 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include /* For SYS_xxx definitions */ #include #include @@ -22,6 +23,7 @@ #include #include "inc/rocprofiler.h" +#include "util/hsa_rsrc_factory.h" #include "util/xml.h" #define PUBLIC_API __attribute__((visibility("default"))) @@ -45,6 +47,7 @@ struct callbacks_data_t { struct context_entry_t { uint32_t valid; uint32_t index; + hsa_agent_t agent; rocprofiler_group_t group; rocprofiler_feature_t* features; unsigned feature_count; @@ -251,6 +254,7 @@ void dump_sqtt_trace(const char* label, const uint32_t chunk, const void* data, struct trace_data_arg_t { FILE* file; const char* label; + hsa_agent_t agent; }; // Trace data callback for getting trace data from GPU local mamory @@ -259,8 +263,19 @@ hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type, hsa_status_t status = HSA_STATUS_SUCCESS; trace_data_arg_t* arg = reinterpret_cast(data); if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) { - fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size); - dump_sqtt_trace(arg->label, info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size); + const uint32_t data_size = info_data->sqtt_data.size; + const void* data_ptr = info_data->sqtt_data.ptr; + fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, data_size); +#if 1 + dump_sqtt_trace(arg->label, info_data->sample_id, data_ptr, data_size); +#else + void* buffer = malloc(data_size); + memset(buffer, 0, data_size); + const bool suc = HsaRsrcFactory::Instance().CopyToHost(arg->agent, buffer, data_ptr, data_size); + if (suc) dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size); + else fatal("SQTT data memcopy to host failed"); + free(buffer); +#endif } else status = HSA_STATUS_ERROR; return status; @@ -272,8 +287,12 @@ unsigned align_size(unsigned size, unsigned alignment) { } // Output profiling results for input features -void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count, - rocprofiler_t* context, const char* label) { +void output_results(const context_entry_t* entry, const char* label) { + FILE* file = entry->file_handle; + const rocprofiler_feature_t* features = entry->features; + const unsigned feature_count = entry->feature_count; + rocprofiler_t* context = entry->group.context; + for (unsigned i = 0; i < feature_count; ++i) { const rocprofiler_feature_t* p = &features[i]; fprintf(file, " %s ", p->name); @@ -298,13 +317,14 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns const uint32_t off = align_size(chunk_size, sizeof(uint32_t)); ptr = chunk_data + off; if (chunk_data >= end) fatal("SQTT data ptr is out of the result buffer size"); + size += chunk_size; } fprintf(file, "size(%lu)\n", size); free(p->data.result_bytes.ptr); const_cast(p)->data.result_bytes.size = 0; } else { fprintf(file, "(\n"); - trace_data_arg_t trace_data_arg{file, label}; + trace_data_arg_t trace_data_arg{file, label, entry->agent}; hsa_status_t status = rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast(&trace_data_arg)); check_status(status); fprintf(file, " )\n"); @@ -319,10 +339,14 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns } // Output group intermeadate profiling results, created internally for complex metrics -void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) { +void output_group(const context_entry_t* entry, const char* label) { + const rocprofiler_group_t* group = &(entry->group); + context_entry_t group_entry = *entry; for (unsigned i = 0; i < group->feature_count; ++i) { if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64) { - output_results(file, group->features[i], 1, group->context, str); + group_entry.features = group->features[i]; + group_entry.feature_count = 1; + output_results(&group_entry, label); } } } @@ -341,11 +365,9 @@ bool dump_context(context_entry_t* entry) { } ++context_collected; + const uint32_t index = entry->index; FILE* file_handle = entry->file_handle; - const rocprofiler_feature_t* features = entry->features; - const unsigned feature_count = entry->feature_count; - const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name); fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\")", @@ -369,13 +391,13 @@ bool dump_context(context_entry_t* entry) { if (group.context != NULL) { status = rocprofiler_group_get_data(&group); check_status(status); - if (verbose == 1) output_group(file_handle, &group, "group0-data"); + if (verbose == 1) output_group(entry, "group0-data"); status = rocprofiler_get_metrics(group.context); check_status(status); std::ostringstream oss; oss << index << "__" << filtr_kernel_name(entry->data.kernel_name); - output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str()); + output_results(entry, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str()); free(const_cast(entry->data.kernel_name)); // Finishing cleanup @@ -545,6 +567,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, } // Fill profiling context entry + entry->agent = callback_data->agent; entry->group = *group; entry->features = tool_data->features; entry->feature_count = tool_data->feature_count; diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp index 88862801d8..7c257e5e93 100644 --- a/test/util/hsa_rsrc_factory.cpp +++ b/test/util/hsa_rsrc_factory.cpp @@ -328,6 +328,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); } + CHECK_STATUS("hsa_memory_allocate", status); return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; } @@ -345,14 +346,21 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); + CHECK_STATUS("hsa_memory_allocate", status); return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; } -// Transfer data method -bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length, - bool host_to_dev) { - hsa_status_t status; - status = hsa_memory_copy(dest_buff, src_buff, length); +// Memcopy method +bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) { + const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length); + CHECK_STATUS("hsa_memory_copy", status); + return (status == HSA_STATUS_SUCCESS); +} + +// Free method +bool HsaRsrcFactory::MemoryFree(void* ptr) { + const hsa_status_t status = hsa_memory_free(ptr); + CHECK_STATUS("hsa_memory_free", status); return (status == HSA_STATUS_SUCCESS); } diff --git a/test/util/hsa_rsrc_factory.h b/test/util/hsa_rsrc_factory.h index d5c10879e0..f5bd947dbc 100644 --- a/test/util/hsa_rsrc_factory.h +++ b/test/util/hsa_rsrc_factory.h @@ -213,8 +213,11 @@ class HsaRsrcFactory { // uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); - // Transfer data method - bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev); + // Memcopy method + static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length); + + // Free method + static bool MemoryFree(void* ptr); // Loads an Assembled Brig file and Finalizes it into Device Isa //