HSA memory alloc/copy/free refactoring
[ROCm/rocprofiler commit: d04f7095f4]
Bu işleme şunda yer alıyor:
@@ -446,8 +446,8 @@ class Context {
|
||||
else EXC_RAISING(HSA_STATUS_ERROR, "SQTT data out of output buffer");
|
||||
}
|
||||
|
||||
hsa_status_t status = hsa_memory_copy(dest, src, size);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
const bool suc = util::HsaRsrcFactory::CopyToHost(dest, src, size);
|
||||
if (suc) {
|
||||
*header = size;
|
||||
callback_data->ptr = dest + align_size(size, sizeof(uint32_t));
|
||||
rinfo->data.result_bytes.instance_count = sample_id + 1;
|
||||
|
||||
@@ -88,8 +88,8 @@ class Profile {
|
||||
}
|
||||
virtual ~Profile() {
|
||||
info_vector_.clear();
|
||||
if (profile_.command_buffer.ptr) hsa_memory_free(profile_.command_buffer.ptr);
|
||||
if (profile_.output_buffer.ptr) hsa_memory_free(profile_.output_buffer.ptr);
|
||||
if (profile_.command_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.command_buffer.ptr);
|
||||
if (profile_.output_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.output_buffer.ptr);
|
||||
if (profile_.events) free(const_cast<event_t*>(profile_.events));
|
||||
if (profile_.parameters) free(const_cast<parameter_t*>(profile_.parameters));
|
||||
if (completion_signal_.handle) {
|
||||
|
||||
@@ -331,6 +331,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
}
|
||||
|
||||
CHECK_STATUS("hsa_memory_allocate", status);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
@@ -348,14 +349,21 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
|
||||
|
||||
uint8_t* buffer = NULL;
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
CHECK_STATUS("hsa_memory_allocate", status);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Transfer data method
|
||||
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
|
||||
bool host_to_dev) {
|
||||
hsa_status_t status;
|
||||
status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
// Memcopy method
|
||||
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
|
||||
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
CHECK_STATUS("hsa_memory_copy", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Free method
|
||||
bool HsaRsrcFactory::MemoryFree(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
@@ -215,8 +215,11 @@ class HsaRsrcFactory {
|
||||
//
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Transfer data method
|
||||
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
|
||||
// Memcopy method
|
||||
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
|
||||
|
||||
// Free method
|
||||
static bool MemoryFree(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
|
||||
@@ -34,7 +34,7 @@ execute_process ( COMMAND sh -xc "mkdir -p ${PROJECT_BINARY_DIR}/RESULTS" )
|
||||
|
||||
## Build test library
|
||||
set ( TEST_LIB "tool" )
|
||||
set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp )
|
||||
set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp ${UTIL_SRC} )
|
||||
add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} )
|
||||
target_include_directories ( ${TEST_LIB} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${TEST_LIB} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
|
||||
|
||||
@@ -214,11 +214,10 @@ bool TestHsa::Run() {
|
||||
total_time_taken_ += dispatch_time_taken_;
|
||||
|
||||
// Copy kernel buffers from local memory into system memory
|
||||
hsa_rsrc_->TransferData(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize(),
|
||||
false);
|
||||
test_->PrintOutput();
|
||||
const bool suc = hsa_rsrc_->CopyToHost(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize());
|
||||
if (suc) test_->PrintOutput();
|
||||
|
||||
return true;
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool TestHsa::VerifyResults() {
|
||||
|
||||
@@ -31,6 +31,8 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <stdint.h>
|
||||
#include <map>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
// Class implements kernel test
|
||||
class TestKernel {
|
||||
public:
|
||||
@@ -49,7 +51,12 @@ class TestKernel {
|
||||
typedef mem_map_t::iterator mem_it_t;
|
||||
typedef mem_map_t::const_iterator mem_const_it_t;
|
||||
|
||||
virtual ~TestKernel() {}
|
||||
virtual ~TestKernel() {
|
||||
for (auto& entry : mem_map_) {
|
||||
void* ptr = entry.second.ptr;
|
||||
if (ptr != NULL) HsaRsrcFactory::MemoryFree(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize method
|
||||
virtual void Init() = 0;
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
@@ -22,6 +23,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/xml.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
@@ -45,6 +47,7 @@ struct callbacks_data_t {
|
||||
struct context_entry_t {
|
||||
uint32_t valid;
|
||||
uint32_t index;
|
||||
hsa_agent_t agent;
|
||||
rocprofiler_group_t group;
|
||||
rocprofiler_feature_t* features;
|
||||
unsigned feature_count;
|
||||
@@ -251,6 +254,7 @@ void dump_sqtt_trace(const char* label, const uint32_t chunk, const void* data,
|
||||
struct trace_data_arg_t {
|
||||
FILE* file;
|
||||
const char* label;
|
||||
hsa_agent_t agent;
|
||||
};
|
||||
|
||||
// Trace data callback for getting trace data from GPU local mamory
|
||||
@@ -259,8 +263,19 @@ hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
trace_data_arg_t* arg = reinterpret_cast<trace_data_arg_t*>(data);
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
|
||||
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size);
|
||||
dump_sqtt_trace(arg->label, info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size);
|
||||
const uint32_t data_size = info_data->sqtt_data.size;
|
||||
const void* data_ptr = info_data->sqtt_data.ptr;
|
||||
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, data_size);
|
||||
#if 1
|
||||
dump_sqtt_trace(arg->label, info_data->sample_id, data_ptr, data_size);
|
||||
#else
|
||||
void* buffer = malloc(data_size);
|
||||
memset(buffer, 0, data_size);
|
||||
const bool suc = HsaRsrcFactory::Instance().CopyToHost(arg->agent, buffer, data_ptr, data_size);
|
||||
if (suc) dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size);
|
||||
else fatal("SQTT data memcopy to host failed");
|
||||
free(buffer);
|
||||
#endif
|
||||
} else
|
||||
status = HSA_STATUS_ERROR;
|
||||
return status;
|
||||
@@ -272,8 +287,12 @@ unsigned align_size(unsigned size, unsigned alignment) {
|
||||
}
|
||||
|
||||
// Output profiling results for input features
|
||||
void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count,
|
||||
rocprofiler_t* context, const char* label) {
|
||||
void output_results(const context_entry_t* entry, const char* label) {
|
||||
FILE* file = entry->file_handle;
|
||||
const rocprofiler_feature_t* features = entry->features;
|
||||
const unsigned feature_count = entry->feature_count;
|
||||
rocprofiler_t* context = entry->group.context;
|
||||
|
||||
for (unsigned i = 0; i < feature_count; ++i) {
|
||||
const rocprofiler_feature_t* p = &features[i];
|
||||
fprintf(file, " %s ", p->name);
|
||||
@@ -298,13 +317,14 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
|
||||
const uint32_t off = align_size(chunk_size, sizeof(uint32_t));
|
||||
ptr = chunk_data + off;
|
||||
if (chunk_data >= end) fatal("SQTT data ptr is out of the result buffer size");
|
||||
size += chunk_size;
|
||||
}
|
||||
fprintf(file, "size(%lu)\n", size);
|
||||
free(p->data.result_bytes.ptr);
|
||||
const_cast<rocprofiler_feature_t*>(p)->data.result_bytes.size = 0;
|
||||
} else {
|
||||
fprintf(file, "(\n");
|
||||
trace_data_arg_t trace_data_arg{file, label};
|
||||
trace_data_arg_t trace_data_arg{file, label, entry->agent};
|
||||
hsa_status_t status = rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast<void*>(&trace_data_arg));
|
||||
check_status(status);
|
||||
fprintf(file, " )\n");
|
||||
@@ -319,10 +339,14 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
|
||||
}
|
||||
|
||||
// Output group intermeadate profiling results, created internally for complex metrics
|
||||
void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
|
||||
void output_group(const context_entry_t* entry, const char* label) {
|
||||
const rocprofiler_group_t* group = &(entry->group);
|
||||
context_entry_t group_entry = *entry;
|
||||
for (unsigned i = 0; i < group->feature_count; ++i) {
|
||||
if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64) {
|
||||
output_results(file, group->features[i], 1, group->context, str);
|
||||
group_entry.features = group->features[i];
|
||||
group_entry.feature_count = 1;
|
||||
output_results(&group_entry, label);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -341,11 +365,9 @@ bool dump_context(context_entry_t* entry) {
|
||||
}
|
||||
|
||||
++context_collected;
|
||||
|
||||
const uint32_t index = entry->index;
|
||||
FILE* file_handle = entry->file_handle;
|
||||
const rocprofiler_feature_t* features = entry->features;
|
||||
const unsigned feature_count = entry->feature_count;
|
||||
|
||||
const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name);
|
||||
|
||||
fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\")",
|
||||
@@ -369,13 +391,13 @@ bool dump_context(context_entry_t* entry) {
|
||||
if (group.context != NULL) {
|
||||
status = rocprofiler_group_get_data(&group);
|
||||
check_status(status);
|
||||
if (verbose == 1) output_group(file_handle, &group, "group0-data");
|
||||
if (verbose == 1) output_group(entry, "group0-data");
|
||||
|
||||
status = rocprofiler_get_metrics(group.context);
|
||||
check_status(status);
|
||||
std::ostringstream oss;
|
||||
oss << index << "__" << filtr_kernel_name(entry->data.kernel_name);
|
||||
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
output_results(entry, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
free(const_cast<char*>(entry->data.kernel_name));
|
||||
|
||||
// Finishing cleanup
|
||||
@@ -545,6 +567,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
}
|
||||
|
||||
// Fill profiling context entry
|
||||
entry->agent = callback_data->agent;
|
||||
entry->group = *group;
|
||||
entry->features = tool_data->features;
|
||||
entry->feature_count = tool_data->feature_count;
|
||||
|
||||
@@ -328,6 +328,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
}
|
||||
|
||||
CHECK_STATUS("hsa_memory_allocate", status);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
@@ -345,14 +346,21 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
|
||||
|
||||
uint8_t* buffer = NULL;
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
CHECK_STATUS("hsa_memory_allocate", status);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Transfer data method
|
||||
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
|
||||
bool host_to_dev) {
|
||||
hsa_status_t status;
|
||||
status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
// Memcopy method
|
||||
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
|
||||
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
CHECK_STATUS("hsa_memory_copy", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Free method
|
||||
bool HsaRsrcFactory::MemoryFree(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
@@ -213,8 +213,11 @@ class HsaRsrcFactory {
|
||||
//
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Transfer data method
|
||||
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
|
||||
// Memcopy method
|
||||
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
|
||||
|
||||
// Free method
|
||||
static bool MemoryFree(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle