fixing SQTT copy mode result buffer allocation

[ROCm/rocprofiler commit: 58e879343f]
This commit is contained in:
Evgeny
2018-01-23 18:57:45 -06:00
parent f92f655139
commit 14e45e9cb4
4 ha cambiato i file con 18 aggiunte e 18 eliminazioni
+14 -9
Vedi File
@@ -185,12 +185,10 @@ class Context {
// Initialize rocprofiler context
void Initialize(rocprofiler_feature_t* info_array, const uint32_t info_count) {
// Set input features filter, to not duplicate referenced features
// Set iput features data as uninitialized
info_map_t input_map;
// Register input features to not duplicate by features referencing
for (unsigned i = 0; i < info_count; ++i) {
rocprofiler_feature_t* info = &info_array[i];
input_map[info->name] = info;
info_map_[info->name] = info;
}
// Adding zero group, always present
@@ -199,11 +197,10 @@ class Context {
// Processing input features
for (unsigned i = 0; i < info_count; ++i) {
rocprofiler_feature_t* info = &info_array[i];
info_map_[info->name] = info;
const rocprofiler_feature_kind_t kind = info->kind;
const char* name = info->name;
if (kind == ROCPROFILER_FEATURE_KIND_METRIC) { // Processing metrics features
if (kind != ROCPROFILER_FEATURE_KIND_TRACE) { // Processing metrics features
const Metric* metric = metrics_->Get(name);
if (metric == NULL)
EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name << "' is not found");
@@ -220,9 +217,7 @@ class Context {
// For metrics expressions checking that there is no the same counter in the input metrics
// and also that the counter wasn't registered already by another input metric expression
if (metric->GetExpr()) {
auto inp_it = input_map.find(counter->name);
auto inf_it = info_map_.find(counter->name);
if ((inp_it != input_map.end()) || (inf_it != info_map_.end())) {
if (info_map_.find(counter->name) != info_map_.end()) {
continue;
} else {
info = NewCounterInfo(counter);
@@ -402,6 +397,16 @@ class Context {
rinfo->data.kind = ROCPROFILER_DATA_KIND_INT64;
} else if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
if (rinfo->data.result_bytes.copy) {
if (sample_id == 0) {
if (rinfo->data.result_bytes.size == 0) {
const uint32_t output_buffer_size = SqttProfile::output_buffer_size;
const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t);
rinfo->data.result_bytes.ptr = calloc(output_buffer_size64, sizeof(uint64_t));
rinfo->data.result_bytes.size = output_buffer_size;
} else if (rinfo->data.result_bytes.size != SqttProfile::output_buffer_size) {
EXC_RAISING(HSA_STATUS_ERROR, "result bytes copy mode, data array size mismatch(" << rinfo->data.result_bytes.size << ")");
}
}
char* result_bytes_ptr = reinterpret_cast<char*>(rinfo->data.result_bytes.ptr);
const char* end = result_bytes_ptr + rinfo->data.result_bytes.size;
const char* src = reinterpret_cast<char*>(ainfo_data->sqtt_data.ptr);
@@ -197,13 +197,6 @@ class SqttProfile : public Profile {
for (unsigned j = 0; j < info.parameter_count; ++j) {
Config<parameter_t>(&profile_).Insert(info.parameters[j]);
}
info.rinfo->data.result_bytes.size = output_buffer_size;
if (info.rinfo->data.result_bytes.copy) {
const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t);
info.rinfo->data.result_bytes.ptr = calloc(output_buffer_size64, sizeof(uint64_t));
memset(info.rinfo->data.result_bytes.ptr, 0, output_buffer_size);
}
}
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
+2 -2
Vedi File
@@ -184,13 +184,13 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
ptr = chunk_data + off;
size += chunk_size;
}
free(p->data.result_bytes.ptr);
fprintf(file, "size(%lu)\n", size);
if (size > p->data.result_bytes.size) {
fprintf(stderr, "SQTT data size is out of the result buffer size\n");
exit(1);
}
free(p->data.result_bytes.ptr);
const_cast<rocprofiler_feature_t*>(p)->data.result_bytes.size = 0;
} else {
fprintf(file, "(\n");
trace_data_arg_t trace_data_arg{file, label};
+2
Vedi File
@@ -29,6 +29,8 @@ else
tbin=$test_bin_dflt
fi
echo "Run $tbin"
export ROCP_KITER=100
export ROCP_DITER=100
eval $tbin
exit 0