HSA memory alloc/copy/free refactoring

[ROCm/rocprofiler commit: d04f7095f4]
Bu işleme şunda yer alıyor:
Evgeny
2018-04-20 11:15:26 -05:00
ebeveyn 12fbe76eab
işleme 0a8e945d88
10 değiştirilmiş dosya ile 87 ekleme ve 36 silme
+2 -2
Dosyayı Görüntüle
@@ -446,8 +446,8 @@ class Context {
else EXC_RAISING(HSA_STATUS_ERROR, "SQTT data out of output buffer");
}
hsa_status_t status = hsa_memory_copy(dest, src, size);
if (status == HSA_STATUS_SUCCESS) {
const bool suc = util::HsaRsrcFactory::CopyToHost(dest, src, size);
if (suc) {
*header = size;
callback_data->ptr = dest + align_size(size, sizeof(uint32_t));
rinfo->data.result_bytes.instance_count = sample_id + 1;
+2 -2
Dosyayı Görüntüle
@@ -88,8 +88,8 @@ class Profile {
}
virtual ~Profile() {
info_vector_.clear();
if (profile_.command_buffer.ptr) hsa_memory_free(profile_.command_buffer.ptr);
if (profile_.output_buffer.ptr) hsa_memory_free(profile_.output_buffer.ptr);
if (profile_.command_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.command_buffer.ptr);
if (profile_.output_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.output_buffer.ptr);
if (profile_.events) free(const_cast<event_t*>(profile_.events));
if (profile_.parameters) free(const_cast<parameter_t*>(profile_.parameters));
if (completion_signal_.handle) {
+13 -5
Dosyayı Görüntüle
@@ -331,6 +331,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
@@ -348,14 +349,21 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Transfer data method
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
bool host_to_dev) {
hsa_status_t status;
status = hsa_memory_copy(dest_buff, src_buff, length);
// Memcopy method
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
CHECK_STATUS("hsa_memory_copy", status);
return (status == HSA_STATUS_SUCCESS);
}
// Free method
bool HsaRsrcFactory::MemoryFree(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
return (status == HSA_STATUS_SUCCESS);
}
+5 -2
Dosyayı Görüntüle
@@ -215,8 +215,11 @@ class HsaRsrcFactory {
//
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Transfer data method
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
// Memcopy method
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
// Free method
static bool MemoryFree(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
+1 -1
Dosyayı Görüntüle
@@ -34,7 +34,7 @@ execute_process ( COMMAND sh -xc "mkdir -p ${PROJECT_BINARY_DIR}/RESULTS" )
## Build test library
set ( TEST_LIB "tool" )
set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp )
set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp ${UTIL_SRC} )
add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} )
target_include_directories ( ${TEST_LIB} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${TEST_LIB} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
+3 -4
Dosyayı Görüntüle
@@ -214,11 +214,10 @@ bool TestHsa::Run() {
total_time_taken_ += dispatch_time_taken_;
// Copy kernel buffers from local memory into system memory
hsa_rsrc_->TransferData(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize(),
false);
test_->PrintOutput();
const bool suc = hsa_rsrc_->CopyToHost(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize());
if (suc) test_->PrintOutput();
return true;
return suc;
}
bool TestHsa::VerifyResults() {
+8 -1
Dosyayı Görüntüle
@@ -31,6 +31,8 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include <map>
#include "util/hsa_rsrc_factory.h"
// Class implements kernel test
class TestKernel {
public:
@@ -49,7 +51,12 @@ class TestKernel {
typedef mem_map_t::iterator mem_it_t;
typedef mem_map_t::const_iterator mem_const_it_t;
virtual ~TestKernel() {}
virtual ~TestKernel() {
for (auto& entry : mem_map_) {
void* ptr = entry.second.ptr;
if (ptr != NULL) HsaRsrcFactory::MemoryFree(ptr);
}
}
// Initialize method
virtual void Init() = 0;
+35 -12
Dosyayı Görüntüle
@@ -10,6 +10,7 @@
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sys/types.h>
#include <unistd.h>
@@ -22,6 +23,7 @@
#include <vector>
#include "inc/rocprofiler.h"
#include "util/hsa_rsrc_factory.h"
#include "util/xml.h"
#define PUBLIC_API __attribute__((visibility("default")))
@@ -45,6 +47,7 @@ struct callbacks_data_t {
struct context_entry_t {
uint32_t valid;
uint32_t index;
hsa_agent_t agent;
rocprofiler_group_t group;
rocprofiler_feature_t* features;
unsigned feature_count;
@@ -251,6 +254,7 @@ void dump_sqtt_trace(const char* label, const uint32_t chunk, const void* data,
struct trace_data_arg_t {
FILE* file;
const char* label;
hsa_agent_t agent;
};
// Trace data callback for getting trace data from GPU local mamory
@@ -259,8 +263,19 @@ hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_status_t status = HSA_STATUS_SUCCESS;
trace_data_arg_t* arg = reinterpret_cast<trace_data_arg_t*>(data);
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size);
dump_sqtt_trace(arg->label, info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size);
const uint32_t data_size = info_data->sqtt_data.size;
const void* data_ptr = info_data->sqtt_data.ptr;
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, data_size);
#if 1
dump_sqtt_trace(arg->label, info_data->sample_id, data_ptr, data_size);
#else
void* buffer = malloc(data_size);
memset(buffer, 0, data_size);
const bool suc = HsaRsrcFactory::Instance().CopyToHost(arg->agent, buffer, data_ptr, data_size);
if (suc) dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size);
else fatal("SQTT data memcopy to host failed");
free(buffer);
#endif
} else
status = HSA_STATUS_ERROR;
return status;
@@ -272,8 +287,12 @@ unsigned align_size(unsigned size, unsigned alignment) {
}
// Output profiling results for input features
void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count,
rocprofiler_t* context, const char* label) {
void output_results(const context_entry_t* entry, const char* label) {
FILE* file = entry->file_handle;
const rocprofiler_feature_t* features = entry->features;
const unsigned feature_count = entry->feature_count;
rocprofiler_t* context = entry->group.context;
for (unsigned i = 0; i < feature_count; ++i) {
const rocprofiler_feature_t* p = &features[i];
fprintf(file, " %s ", p->name);
@@ -298,13 +317,14 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
const uint32_t off = align_size(chunk_size, sizeof(uint32_t));
ptr = chunk_data + off;
if (chunk_data >= end) fatal("SQTT data ptr is out of the result buffer size");
size += chunk_size;
}
fprintf(file, "size(%lu)\n", size);
free(p->data.result_bytes.ptr);
const_cast<rocprofiler_feature_t*>(p)->data.result_bytes.size = 0;
} else {
fprintf(file, "(\n");
trace_data_arg_t trace_data_arg{file, label};
trace_data_arg_t trace_data_arg{file, label, entry->agent};
hsa_status_t status = rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast<void*>(&trace_data_arg));
check_status(status);
fprintf(file, " )\n");
@@ -319,10 +339,14 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
}
// Output group intermeadate profiling results, created internally for complex metrics
void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
void output_group(const context_entry_t* entry, const char* label) {
const rocprofiler_group_t* group = &(entry->group);
context_entry_t group_entry = *entry;
for (unsigned i = 0; i < group->feature_count; ++i) {
if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64) {
output_results(file, group->features[i], 1, group->context, str);
group_entry.features = group->features[i];
group_entry.feature_count = 1;
output_results(&group_entry, label);
}
}
}
@@ -341,11 +365,9 @@ bool dump_context(context_entry_t* entry) {
}
++context_collected;
const uint32_t index = entry->index;
FILE* file_handle = entry->file_handle;
const rocprofiler_feature_t* features = entry->features;
const unsigned feature_count = entry->feature_count;
const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name);
fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\")",
@@ -369,13 +391,13 @@ bool dump_context(context_entry_t* entry) {
if (group.context != NULL) {
status = rocprofiler_group_get_data(&group);
check_status(status);
if (verbose == 1) output_group(file_handle, &group, "group0-data");
if (verbose == 1) output_group(entry, "group0-data");
status = rocprofiler_get_metrics(group.context);
check_status(status);
std::ostringstream oss;
oss << index << "__" << filtr_kernel_name(entry->data.kernel_name);
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
output_results(entry, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
free(const_cast<char*>(entry->data.kernel_name));
// Finishing cleanup
@@ -545,6 +567,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
}
// Fill profiling context entry
entry->agent = callback_data->agent;
entry->group = *group;
entry->features = tool_data->features;
entry->feature_count = tool_data->feature_count;
+13 -5
Dosyayı Görüntüle
@@ -328,6 +328,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
@@ -345,14 +346,21 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Transfer data method
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
bool host_to_dev) {
hsa_status_t status;
status = hsa_memory_copy(dest_buff, src_buff, length);
// Memcopy method
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
CHECK_STATUS("hsa_memory_copy", status);
return (status == HSA_STATUS_SUCCESS);
}
// Free method
bool HsaRsrcFactory::MemoryFree(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
return (status == HSA_STATUS_SUCCESS);
}
+5 -2
Dosyayı Görüntüle
@@ -213,8 +213,11 @@ class HsaRsrcFactory {
//
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Transfer data method
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
// Memcopy method
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
// Free method
static bool MemoryFree(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//