adding context pool

Change-Id: Id8c6ee43ac6c86480848a5d3e759c42730cf388a


[ROCm/rocprofiler commit: 632309a626]
이 커밋은 다음에 포함됨:
Evgeny
2019-03-03 01:04:00 -06:00
부모 8feba010df
커밋 878e8e20b1
7개의 변경된 파일514개의 추가작업 그리고 50개의 파일을 삭제
+5 -3
파일 보기
@@ -14,13 +14,13 @@ else
test_app=$*
# paths to ROC profiler and oher libraries
export LD_LIBRARY_PATH=$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH
export PATH=.:$PATH
# ROC profiler library loaded by HSA runtime
export HSA_TOOLS_LIB=librocprofiler64.so.1
# tool library loaded by ROC profiler
if [ -z $ROCP_TOOL_LIB ] ; then
if [ -z "$ROCP_TOOL_LIB" ] ; then
export ROCP_TOOL_LIB=libintercept_test.so
fi
# enable error messages
@@ -30,7 +30,9 @@ export ROCPROFILER_LOG=1
# ROC profiler metrics config file
unset ROCP_PROXY_QUEUE
# ROC profiler metrics config file
export ROCP_METRICS=$BIN_DIR/lib/metrics.xml
if [ -z "$ROCP_METRICS" ] ; then
export ROCP_METRICS=$BIN_DIR/lib/metrics.xml
fi
LD_PRELOAD=$ROCP_TOOL_LIB $test_app
fi
+44
파일 보기
@@ -383,6 +383,50 @@ hsa_status_t rocprofiler_queue_create_profiled(
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
hsa_queue_t** queue);
////////////////////////////////////////////////////////////////////////////////
// Profiling pool
//
// Support for profiling contexts pool
// Profiling pool
typedef void rocprofiler_pool_t;
// Profiling pool entry
typedef struct {
rocprofiler_t* context; // context object
void* payload; // payload data object
} rocprofiler_pool_entry_t;
// Profiling handler, calling on profiling completion
typedef bool (*rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t* entry, void* arg);
// Profiling preperties
typedef struct {
uint32_t num_entries; // pool size entries
uint32_t payload_bytes; // payload size bytes
rocprofiler_pool_handler_t handler; // handler on context completion
void* handler_arg; // the handler arg
} rocprofiler_pool_properties_t;
// Open profiling pool
hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, // GPU handle
rocprofiler_feature_t* features, // [in] profiling features array
uint32_t feature_count, // profiling info count
rocprofiler_pool_t** pool, // [out] context object
uint32_t mode, // profiling mode mask
rocprofiler_pool_properties_t*); // pool properties
// Close profiling pool
hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool); // profiling pool handle
// Fetch profiling pool entry
hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t* pool, // profiling pool handle
rocprofiler_pool_entry_t* entry); // [out] empty profling pool entry
// Flush profiling pool
hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool); // profiling pool handle
////////////////////////////////////////////////////////////////////////////////
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
+15 -5
파일 보기
@@ -153,11 +153,20 @@ class Context {
public:
typedef std::map<std::string, rocprofiler_feature_t*> info_map_t;
static void Create(Context** context, const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
static void Create(Context* obj, const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg)
{
*context = NULL;
new (obj) Context(agent_info, queue, info, info_count, handler, handler_arg);
obj->Construct(agent_info, queue, info, info_count, handler, handler_arg);
}
static void Release(Context* obj) { obj->Destruct(); }
static Context* Create(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg)
{
Context* obj = new Context(agent_info, queue, info, info_count, handler, handler_arg);
if (obj == NULL) EXC_RAISING(HSA_STATUS_ERROR, "allocation error");
try {
obj->Construct(agent_info, queue, info, info_count, handler, handler_arg);
} catch(...) {
@@ -165,7 +174,7 @@ class Context {
obj = NULL;
throw;
}
*context = obj;
return obj;
}
static void Destroy(Context* obj) { if (obj != NULL) delete obj; }
@@ -300,7 +309,9 @@ class Context {
handler_arg_(handler_arg)
{}
~Context() {
~Context() { Destruct(); }
void Destruct() {
for (const auto& v : info_map_) {
const std::string& name = v.first;
const rocprofiler_feature_t* info = v.second;
@@ -311,7 +322,6 @@ class Context {
}
}
void Construct(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg)
{
+193
파일 보기
@@ -0,0 +1,193 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef SRC_CORE_CONTEXT_POOL_H_
#define SRC_CORE_CONTEXT_POOL_H_
#include "inc/rocprofiler.h"
#include <thread>
#include "core/context.h"
namespace rocprofiler {
class ContextPool {
public:
typedef uint64_t index_t;
typedef std::mutex mutex_t;
struct entry_t {
ContextPool* pool;
Context* context;
std::atomic<bool> completed;
};
static ContextPool* Create(
uint32_t num_entries,
uint32_t payload_bytes,
const util::AgentInfo* agent_info,
rocprofiler_feature_t* info,
const uint32_t info_count,
rocprofiler_pool_handler_t handler,
void* handler_arg)
{
ContextPool* obj = new ContextPool(num_entries, payload_bytes, agent_info, info, info_count, handler, handler_arg);
if (obj == NULL) EXC_RAISING(HSA_STATUS_ERROR, "allocation error");
return obj;
}
static void Destroy(ContextPool* pool) { delete pool; }
void Fetch(rocprofiler_pool_entry_t* pool_entry) {
if (constructed_ == false) {
Construct(agent_info_, info_, info_count_);
}
const index_t write_index = write_index_.fetch_add(entry_size_bytes_, std::memory_order_relaxed);
while (write_index >= (read_index_.load(std::memory_order_acquire) + array_size_bytes_)) {
check_completed();
std::this_thread::yield();
}
entry_t* entry = GetPoolEntry(write_index, pool_entry);
if (entry->completed.load(std::memory_order_relaxed) != false) EXC_RAISING(HSA_STATUS_ERROR, "Corrupted pool entry");
}
void Flush() {
check_completed();
}
private:
static unsigned aligned64(const unsigned& size) { return (size + 0x3f) & ~0x3fu; }
static bool context_handler(rocprofiler_group_t group, void* arg) {
entry_t* entry = reinterpret_cast<entry_t*>(arg);
entry->completed.store(true, std::memory_order_release);
entry->pool->check_completed();
return true;
}
ContextPool(
uint32_t num_entries,
uint32_t payload_bytes,
const util::AgentInfo* agent_info,
rocprofiler_feature_t* info,
const uint32_t info_count,
rocprofiler_pool_handler_t pool_handler,
void* pool_handler_arg
) :
payload_off_(aligned64(sizeof(entry_t))),
entry_size_bytes_(payload_off_ + aligned64(payload_bytes)),
array_size_bytes_(entry_size_bytes_ * num_entries),
array_(NULL),
read_index_(0),
write_index_(0),
sync_flag_(false),
agent_info_(agent_info),
info_(info),
info_count_(info_count),
pool_handler_(pool_handler),
pool_handler_arg_(pool_handler_arg),
constructed_(false)
{}
void Construct(const util::AgentInfo* agent_info, rocprofiler_feature_t* info, const uint32_t info_count) {
std::lock_guard<mutex_t> lck(mutex_);
if (constructed_ == false) {
array_data_ = (char*) malloc(array_size_bytes_ + 0x3f);
array_ = reinterpret_cast<char*>(((intptr_t)array_data_ + 0x3f) >> 6 << 6);
if (((intptr_t)array_ & 0x3f) != 0) EXC_RAISING(HSA_STATUS_ERROR, "Pool array is not aligned");
memset(array_, 0, array_size_bytes_);
const char* end = array_ + array_size_bytes_;
for (char* ptr = array_; ptr < end; ptr += entry_size_bytes_) {
entry_t* entry = reinterpret_cast<entry_t*>(ptr);
entry->pool = this;
entry->context = Context::Create(agent_info, NULL, info, info_count, ContextPool::context_handler, ptr);
}
constructed_ = true;
}
}
~ContextPool() {
const char* end = array_ + array_size_bytes_;
for (char* ptr = array_; ptr < end; ptr += entry_size_bytes_) {
entry_t* entry = reinterpret_cast<entry_t*>(ptr);
Context::Destroy(entry->context);
}
free(array_);
}
char* GetArrayPtr(const uint32_t& index) { return array_ + (index % array_size_bytes_); }
entry_t* GetPoolEntry(const uint32_t& index, rocprofiler_pool_entry_t* pool_entry) {
char* ptr = GetArrayPtr(index);
entry_t* entry = reinterpret_cast<entry_t*>(ptr);
void* payload = ptr + payload_off_;
*pool_entry = rocprofiler_pool_entry_t{};
pool_entry->context = reinterpret_cast<rocprofiler_t*>(entry->context);
pool_entry->payload = payload;
return entry;
}
void check_completed() {
if (sync_flag_.test_and_set(std::memory_order_acquire) == false) {
index_t read_index = read_index_.load(std::memory_order_relaxed);
const index_t write_index = write_index_.load(std::memory_order_relaxed);
while(read_index < write_index) {
rocprofiler_pool_entry_t pool_entry{};
entry_t* entry = GetPoolEntry(read_index, &pool_entry);
if (entry->completed.load(std::memory_order_acquire) == true) {
pool_handler_(&pool_entry, pool_handler_arg_);
entry->completed.store(false, std::memory_order_relaxed);
read_index += entry_size_bytes_;
read_index_.store(read_index, std::memory_order_release);
} else {
break;
}
}
sync_flag_.clear(std::memory_order_release);
}
}
const uint32_t payload_off_;
const uint32_t entry_size_bytes_;
const uint32_t array_size_bytes_;
char* array_data_;
char* array_;
volatile std::atomic<index_t> read_index_;
volatile std::atomic<index_t> write_index_;
volatile std::atomic_flag sync_flag_;
const util::AgentInfo* agent_info_;
rocprofiler_feature_t* info_;
const uint32_t info_count_;
rocprofiler_pool_handler_t pool_handler_;
void* pool_handler_arg_;
bool constructed_;
mutex_t mutex_;
};
} // namespace rocprofiler
#endif // SRC_CORE_CONTEXT_POOL_H_
+61 -2
파일 보기
@@ -28,6 +28,7 @@ THE SOFTWARE.
#include <vector>
#include "core/context.h"
#include "core/context_pool.h"
#include "core/hsa_queue.h"
#include "core/intercept_queue.h"
#include "core/proxy_queue.h"
@@ -477,8 +478,8 @@ PUBLIC_API hsa_status_t rocprofiler_open(hsa_agent_t agent, rocprofiler_feature_
}
rocprofiler::Context** context_ret = reinterpret_cast<rocprofiler::Context**>(handle);
rocprofiler::Context::Create(context_ret, agent_info, queue, features, feature_count, properties->handler,
properties->handler_arg);
*context_ret = rocprofiler::Context::Create(agent_info, queue, features, feature_count,
properties->handler, properties->handler_arg);
API_METHOD_SUFFIX
}
@@ -608,6 +609,64 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(
API_METHOD_SUFFIX
}
////////////////////////////////////////////////////////////////////////////////
// Open profiling pool
PUBLIC_API hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, // GPU handle
rocprofiler_feature_t* features, // [in] profiling features array
uint32_t feature_count, // profiling info count
rocprofiler_pool_t** pool, // [out] context object
uint32_t mode, // profiling mode mask
rocprofiler_pool_properties_t* properties) // pool properties
{
API_METHOD_PREFIX
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent);
if (agent_info == NULL) {
EXC_RAISING(HSA_STATUS_ERROR, "agent is not found");
}
rocprofiler::ContextPool* obj = rocprofiler::ContextPool::Create(
properties->num_entries,
properties->payload_bytes,
agent_info,
features,
feature_count,
properties->handler,
properties->handler_arg
);
*pool = reinterpret_cast<rocprofiler_pool_t*>(obj);
API_METHOD_SUFFIX
}
// Close profiling pool
PUBLIC_API hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool) // profiling pool handle
{
API_METHOD_PREFIX
rocprofiler::ContextPool* obj = reinterpret_cast<rocprofiler::ContextPool*>(pool);
rocprofiler::ContextPool::Destroy(obj);
API_METHOD_SUFFIX
}
// Fetch profiling pool entry
PUBLIC_API hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t* pool, // profiling pool handle
rocprofiler_pool_entry_t* entry) // [out] empty profling pool entry
{
API_METHOD_PREFIX
rocprofiler::ContextPool* context_pool = reinterpret_cast<rocprofiler::ContextPool*>(pool);
context_pool->Fetch(entry);
API_METHOD_SUFFIX
}
// Fetch profiling pool entry
PUBLIC_API hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool) // profiling pool handle
{
API_METHOD_PREFIX
rocprofiler::ContextPool* context_pool = reinterpret_cast<rocprofiler::ContextPool*>(pool);
context_pool->Flush();
API_METHOD_SUFFIX
}
////////////////////////////////////////////////////////////////////////////////
// Return the info for a given info kind
PUBLIC_API hsa_status_t rocprofiler_get_info(
const hsa_agent_t *agent,
+163 -35
파일 보기
@@ -25,9 +25,10 @@ THE SOFTWARE.
#include <unistd.h>
#include <dlfcn.h>
#include <iostream>
#include <vector>
#include <atomic>
#include <iostream>
#include <sstream>
#include <vector>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
@@ -36,6 +37,7 @@ THE SOFTWARE.
#include "dummy_kernel/dummy_kernel.h"
#include "simple_convolution/simple_convolution.h"
#include "util/test_assert.h"
#include "util/xml.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
@@ -45,6 +47,9 @@ THE SOFTWARE.
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
// Tool is unloaded
volatile bool is_loaded = false;
// Profiling features
//rocprofiler_feature_t* features = NULL;
//unsigned feature_count = 0;
// Error handler
void fatal(const std::string msg) {
@@ -72,8 +77,19 @@ struct context_entry_t {
rocprofiler_callback_data_t data;
};
// Context callback arg
struct callbacks_arg_t {
rocprofiler_pool_t** pools;
};
// Handler callback arg
struct handler_arg_t {
rocprofiler_feature_t* features;
unsigned feature_count;
};
// Dump stored context entry
void dump_context_entry(context_entry_t* entry) {
void dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features, unsigned feature_count) {
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
while (valid->load() == false) sched_yield();
@@ -97,26 +113,44 @@ void dump_context_entry(context_entry_t* entry) {
rocprofiler_group_t& group = entry->group;
if (group.context == NULL) {
fprintf(stderr, "tool error: context is NULL\n");
abort();
fatal("context is NULL\n");
}
if (feature_count > 0) {
hsa_status_t status = rocprofiler_group_get_data(&group);
check_status(status);
status = rocprofiler_get_metrics(group.context);
check_status(status);
}
rocprofiler_close(group.context);
for (unsigned i = 0; i < feature_count; ++i) {
const rocprofiler_feature_t* p = &features[i];
fprintf(stdout, "> %s ", p->name);
switch (p->data.kind) {
// Output metrics results
case ROCPROFILER_DATA_KIND_INT64:
fprintf(stdout, "= (%lu)\n", p->data.result_int64);
break;
default:
fprintf(stderr, "Undefined data kind(%u)\n", p->data.kind);
abort();
}
}
}
// Profiling completion handler
// Dump and delete the context entry
// Return true if the context was dumped successfully
bool context_handler(rocprofiler_group_t group, void* arg) {
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
bool context_handler(const rocprofiler_pool_entry_t* entry, void* arg) {
// Context entry
context_entry_t* ctx_entry = reinterpret_cast<context_entry_t*>(entry->payload);
handler_arg_t* handler_arg = reinterpret_cast<handler_arg_t*>(arg);
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
dump_context_entry(entry);
delete entry;
dump_context_entry(ctx_entry, handler_arg->features, handler_arg->feature_count);
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
@@ -125,35 +159,65 @@ bool context_handler(rocprofiler_group_t group, void* arg) {
return false;
}
#if 0
// Profiling completion handler
// Dump and delete the context entry
// Return true if the context was dumped successfully
bool context_handler1(rocprofiler_group_t group, void* arg) {
context_entry_t* ctx_entry = reinterpret_cast<context_entry_t*>(arg);
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
dump_context_entry(ctx_entry, features, feature_count);
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
abort();
}
return false;
}
#endif
// Kernel disoatch callback
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/,
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* arg,
rocprofiler_group_t* group) {
// Passed tool data
hsa_agent_t agent = callback_data->agent;
// HSA status
hsa_status_t status = HSA_STATUS_ERROR;
// Profiling context
rocprofiler_t* context = NULL;
// Context entry
context_entry_t* entry = new context_entry_t();
// context properties
rocprofiler_properties_t properties{};
properties.handler = context_handler;
properties.handler_arg = (void*)entry;
#if 1
// Open profiling context
status = rocprofiler_open(callback_data->agent, NULL, 0,
const unsigned gpu_id = HsaRsrcFactory::Instance().GetAgentInfo(agent)->dev_index;
callbacks_arg_t* callbacks_arg = reinterpret_cast<callbacks_arg_t*>(arg);
rocprofiler_pool_t* pool = callbacks_arg->pools[gpu_id];
rocprofiler_pool_entry_t pool_entry{};
status = rocprofiler_pool_fetch(pool, &pool_entry);
check_status(status);
// Profiling context entry
rocprofiler_t* context = pool_entry.context;
context_entry_t* entry = reinterpret_cast<context_entry_t*>(pool_entry.payload);
#else
// Open profiling context
// context properties
context_entry_t* entry = new context_entry_t{};
rocprofiler_t* context = NULL;
rocprofiler_properties_t properties{};
properties.handler = context_handler1;
properties.handler_arg = (void*)entry;
status = rocprofiler_open(agent, features, feature_count,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
check_status(status);
#endif
// Get group[0]
status = rocprofiler_get_group(context, 0, group);
check_status(status);
// Fill profiling context entry
entry->agent = callback_data->agent;
entry->agent = agent;
entry->group = *group;
entry->data = *callback_data;
entry->data.kernel_name = strdup(callback_data->kernel_name);
@@ -162,26 +226,90 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
return HSA_STATUS_SUCCESS;
}
unsigned metrics_input(rocprofiler_feature_t** ret) {
// Profiling feature objects
const unsigned feature_count = 9;
rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count];
memset(features, 0, feature_count * sizeof(rocprofiler_feature_t));
// PMC events
features[0].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[0].name = "GRBM_COUNT";
features[1].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[1].name = "GRBM_GUI_ACTIVE";
features[2].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[2].name = "GPUBusy";
features[3].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[3].name = "SQ_WAVES";
features[4].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[4].name = "SQ_INSTS_VALU";
features[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[5].name = "VALUInsts";
features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[6].name = "TCC_HIT_sum";
features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[7].name = "TCC_MISS_sum";
features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[8].name = "WRITE_SIZE";
*ret = features;
return feature_count;
}
void initialize() {
// Getting GPU device info
const AgentInfo* agent_info = NULL;
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) {
fprintf(stderr, "GetGpuAgentInfo failed\n");
abort();
}
// Available GPU agents
const unsigned gpu_count = HsaRsrcFactory::Instance().GetCountOfGpuAgents();
// Getting profiling features
rocprofiler_feature_t* features = NULL;
unsigned feature_count = metrics_input(&features);
// Handler arg
handler_arg_t* handler_arg = new handler_arg_t{};
handler_arg->features = features;
handler_arg->feature_count = feature_count;
// Context properties
rocprofiler_pool_properties_t properties{};
properties.num_entries = 100;
properties.payload_bytes = sizeof(context_entry_t);
properties.handler = context_handler;
properties.handler_arg = handler_arg;
// Adding dispatch observer
callbacks_arg_t* callbacks_arg = new callbacks_arg_t{};
callbacks_arg->pools = new rocprofiler_pool_t* [gpu_count];
for (unsigned gpu_id = 0; gpu_id < gpu_count; gpu_id++) {
// Getting GPU device info
const AgentInfo* agent_info = NULL;
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(gpu_id, &agent_info) == false) {
fprintf(stderr, "GetGpuAgentInfo failed\n");
abort();
}
// Open profiling pool
rocprofiler_pool_t* pool = NULL;
hsa_status_t status = rocprofiler_pool_open(agent_info->dev_id, features, feature_count,
&pool, 0/*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
check_status(status);
callbacks_arg->pools[gpu_id] = pool;
}
rocprofiler_queue_callbacks_t callbacks_ptrs{};
callbacks_ptrs.dispatch = dispatch_callback;
rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL);
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg);
}
void cleanup() {
// Unregister dispatch callback
rocprofiler_remove_queue_callbacks();
// Dump stored profiling output data
fflush(stdout);
// CLose profiling pool
#if 0
hsa_status_t status = rocprofiler_pool_flush(pool);
check_status(status);
status = rocprofiler_pool_close(pool);
check_status(status);
#endif
}
// Tool constructor
+33 -5
파일 보기
@@ -22,6 +22,21 @@
# THE SOFTWARE.
################################################################################
# test check routin
test_status=0
eval_test() {
label=$1
cmdline=$2
echo "$label: \"$cmdline\""
eval "$cmdline"
if [ $? != 0 ] ; then
echo "$label: FAILED"
test_status=$(($test_status + 1))
else
echo "$label: PASSED"
fi
}
# enable tools load failure reporting
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
# paths to ROC profiler and oher libraries
@@ -37,12 +52,22 @@ export ROCP_METRICS=metrics.xml
# test trace
export ROC_TEST_TRACE=1
## Intercepting usage model test
# tool library loaded by ROC profiler
export ROCP_TOOL_LIB=./test/libintercept_test.so
../bin/run_tool.sh ./test/ctrl
export ROCP_KITER=50
export ROCP_DITER=50
export ROCP_AGENTS=1
export ROCP_THRS=1
eval_test "Intercepting usage model test" "../bin/run_tool.sh ./test/ctrl"
## Standalone sampling usage model test
unset ROCP_TOOL_LIB
eval ./test/standalone_test
eval_test "Standalone sampling usage model test" ./test/standalone_test
## Libtool test
# tool library loaded by ROC profiler
export ROCP_TOOL_LIB=libtool.so
@@ -61,7 +86,9 @@ export ROCP_DITER=50
export ROCP_AGENTS=1
export ROCP_THRS=1
export ROCP_INPUT=input.xml
eval ./test/ctrl
eval_test "'rocprof' libtool test" ./test/ctrl
## Libtool test, counter sets
# Memcopies tracking
export ROCP_MCOPY_TRACKING=1
@@ -69,10 +96,11 @@ export ROCP_MCOPY_TRACKING=1
export ROCP_KITER=1
export ROCP_DITER=4
export ROCP_INPUT=input1.xml
eval ./test/ctrl
eval_test "libtool test, counter sets" ./test/ctrl
#valgrind --leak-check=full $tbin
#valgrind --tool=massif $tbin
#ms_print massif.out.<N>
exit 0
if [ $test_status != 0 ] ; then echo "$test_status tests failed"; fi
exit $test_status