adding context pool
Change-Id: Id8c6ee43ac6c86480848a5d3e759c42730cf388a
[ROCm/rocprofiler commit: 632309a626]
이 커밋은 다음에 포함됨:
@@ -14,13 +14,13 @@ else
|
||||
test_app=$*
|
||||
|
||||
# paths to ROC profiler and oher libraries
|
||||
export LD_LIBRARY_PATH=$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH
|
||||
export PATH=.:$PATH
|
||||
|
||||
# ROC profiler library loaded by HSA runtime
|
||||
export HSA_TOOLS_LIB=librocprofiler64.so.1
|
||||
# tool library loaded by ROC profiler
|
||||
if [ -z $ROCP_TOOL_LIB ] ; then
|
||||
if [ -z "$ROCP_TOOL_LIB" ] ; then
|
||||
export ROCP_TOOL_LIB=libintercept_test.so
|
||||
fi
|
||||
# enable error messages
|
||||
@@ -30,7 +30,9 @@ export ROCPROFILER_LOG=1
|
||||
# ROC profiler metrics config file
|
||||
unset ROCP_PROXY_QUEUE
|
||||
# ROC profiler metrics config file
|
||||
export ROCP_METRICS=$BIN_DIR/lib/metrics.xml
|
||||
if [ -z "$ROCP_METRICS" ] ; then
|
||||
export ROCP_METRICS=$BIN_DIR/lib/metrics.xml
|
||||
fi
|
||||
|
||||
LD_PRELOAD=$ROCP_TOOL_LIB $test_app
|
||||
fi
|
||||
|
||||
@@ -383,6 +383,50 @@ hsa_status_t rocprofiler_queue_create_profiled(
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Profiling pool
|
||||
//
|
||||
// Support for profiling contexts pool
|
||||
|
||||
// Profiling pool
|
||||
typedef void rocprofiler_pool_t;
|
||||
|
||||
// Profiling pool entry
|
||||
typedef struct {
|
||||
rocprofiler_t* context; // context object
|
||||
void* payload; // payload data object
|
||||
} rocprofiler_pool_entry_t;
|
||||
|
||||
// Profiling handler, calling on profiling completion
|
||||
typedef bool (*rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t* entry, void* arg);
|
||||
|
||||
// Profiling preperties
|
||||
typedef struct {
|
||||
uint32_t num_entries; // pool size entries
|
||||
uint32_t payload_bytes; // payload size bytes
|
||||
rocprofiler_pool_handler_t handler; // handler on context completion
|
||||
void* handler_arg; // the handler arg
|
||||
} rocprofiler_pool_properties_t;
|
||||
|
||||
// Open profiling pool
|
||||
hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, // GPU handle
|
||||
rocprofiler_feature_t* features, // [in] profiling features array
|
||||
uint32_t feature_count, // profiling info count
|
||||
rocprofiler_pool_t** pool, // [out] context object
|
||||
uint32_t mode, // profiling mode mask
|
||||
rocprofiler_pool_properties_t*); // pool properties
|
||||
|
||||
// Close profiling pool
|
||||
hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool); // profiling pool handle
|
||||
|
||||
// Fetch profiling pool entry
|
||||
hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t* pool, // profiling pool handle
|
||||
rocprofiler_pool_entry_t* entry); // [out] empty profling pool entry
|
||||
|
||||
// Flush profiling pool
|
||||
hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool); // profiling pool handle
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -153,11 +153,20 @@ class Context {
|
||||
public:
|
||||
typedef std::map<std::string, rocprofiler_feature_t*> info_map_t;
|
||||
|
||||
static void Create(Context** context, const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
|
||||
static void Create(Context* obj, const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
|
||||
const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg)
|
||||
{
|
||||
*context = NULL;
|
||||
new (obj) Context(agent_info, queue, info, info_count, handler, handler_arg);
|
||||
obj->Construct(agent_info, queue, info, info_count, handler, handler_arg);
|
||||
}
|
||||
|
||||
static void Release(Context* obj) { obj->Destruct(); }
|
||||
|
||||
static Context* Create(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
|
||||
const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg)
|
||||
{
|
||||
Context* obj = new Context(agent_info, queue, info, info_count, handler, handler_arg);
|
||||
if (obj == NULL) EXC_RAISING(HSA_STATUS_ERROR, "allocation error");
|
||||
try {
|
||||
obj->Construct(agent_info, queue, info, info_count, handler, handler_arg);
|
||||
} catch(...) {
|
||||
@@ -165,7 +174,7 @@ class Context {
|
||||
obj = NULL;
|
||||
throw;
|
||||
}
|
||||
*context = obj;
|
||||
return obj;
|
||||
}
|
||||
|
||||
static void Destroy(Context* obj) { if (obj != NULL) delete obj; }
|
||||
@@ -300,7 +309,9 @@ class Context {
|
||||
handler_arg_(handler_arg)
|
||||
{}
|
||||
|
||||
~Context() {
|
||||
~Context() { Destruct(); }
|
||||
|
||||
void Destruct() {
|
||||
for (const auto& v : info_map_) {
|
||||
const std::string& name = v.first;
|
||||
const rocprofiler_feature_t* info = v.second;
|
||||
@@ -311,7 +322,6 @@ class Context {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Construct(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
|
||||
const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef SRC_CORE_CONTEXT_POOL_H_
|
||||
#define SRC_CORE_CONTEXT_POOL_H_
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "core/context.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
class ContextPool {
|
||||
public:
|
||||
typedef uint64_t index_t;
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
struct entry_t {
|
||||
ContextPool* pool;
|
||||
Context* context;
|
||||
std::atomic<bool> completed;
|
||||
};
|
||||
|
||||
static ContextPool* Create(
|
||||
uint32_t num_entries,
|
||||
uint32_t payload_bytes,
|
||||
const util::AgentInfo* agent_info,
|
||||
rocprofiler_feature_t* info,
|
||||
const uint32_t info_count,
|
||||
rocprofiler_pool_handler_t handler,
|
||||
void* handler_arg)
|
||||
{
|
||||
ContextPool* obj = new ContextPool(num_entries, payload_bytes, agent_info, info, info_count, handler, handler_arg);
|
||||
if (obj == NULL) EXC_RAISING(HSA_STATUS_ERROR, "allocation error");
|
||||
return obj;
|
||||
}
|
||||
|
||||
static void Destroy(ContextPool* pool) { delete pool; }
|
||||
|
||||
void Fetch(rocprofiler_pool_entry_t* pool_entry) {
|
||||
if (constructed_ == false) {
|
||||
Construct(agent_info_, info_, info_count_);
|
||||
}
|
||||
const index_t write_index = write_index_.fetch_add(entry_size_bytes_, std::memory_order_relaxed);
|
||||
while (write_index >= (read_index_.load(std::memory_order_acquire) + array_size_bytes_)) {
|
||||
check_completed();
|
||||
std::this_thread::yield();
|
||||
}
|
||||
entry_t* entry = GetPoolEntry(write_index, pool_entry);
|
||||
if (entry->completed.load(std::memory_order_relaxed) != false) EXC_RAISING(HSA_STATUS_ERROR, "Corrupted pool entry");
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
check_completed();
|
||||
}
|
||||
|
||||
private:
|
||||
static unsigned aligned64(const unsigned& size) { return (size + 0x3f) & ~0x3fu; }
|
||||
|
||||
static bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
entry->completed.store(true, std::memory_order_release);
|
||||
entry->pool->check_completed();
|
||||
return true;
|
||||
}
|
||||
|
||||
ContextPool(
|
||||
uint32_t num_entries,
|
||||
uint32_t payload_bytes,
|
||||
const util::AgentInfo* agent_info,
|
||||
rocprofiler_feature_t* info,
|
||||
const uint32_t info_count,
|
||||
rocprofiler_pool_handler_t pool_handler,
|
||||
void* pool_handler_arg
|
||||
) :
|
||||
payload_off_(aligned64(sizeof(entry_t))),
|
||||
entry_size_bytes_(payload_off_ + aligned64(payload_bytes)),
|
||||
array_size_bytes_(entry_size_bytes_ * num_entries),
|
||||
array_(NULL),
|
||||
read_index_(0),
|
||||
write_index_(0),
|
||||
sync_flag_(false),
|
||||
|
||||
agent_info_(agent_info),
|
||||
info_(info),
|
||||
info_count_(info_count),
|
||||
pool_handler_(pool_handler),
|
||||
pool_handler_arg_(pool_handler_arg),
|
||||
constructed_(false)
|
||||
{}
|
||||
|
||||
void Construct(const util::AgentInfo* agent_info, rocprofiler_feature_t* info, const uint32_t info_count) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (constructed_ == false) {
|
||||
array_data_ = (char*) malloc(array_size_bytes_ + 0x3f);
|
||||
array_ = reinterpret_cast<char*>(((intptr_t)array_data_ + 0x3f) >> 6 << 6);
|
||||
if (((intptr_t)array_ & 0x3f) != 0) EXC_RAISING(HSA_STATUS_ERROR, "Pool array is not aligned");
|
||||
memset(array_, 0, array_size_bytes_);
|
||||
|
||||
const char* end = array_ + array_size_bytes_;
|
||||
for (char* ptr = array_; ptr < end; ptr += entry_size_bytes_) {
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(ptr);
|
||||
entry->pool = this;
|
||||
entry->context = Context::Create(agent_info, NULL, info, info_count, ContextPool::context_handler, ptr);
|
||||
}
|
||||
|
||||
constructed_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
~ContextPool() {
|
||||
const char* end = array_ + array_size_bytes_;
|
||||
for (char* ptr = array_; ptr < end; ptr += entry_size_bytes_) {
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(ptr);
|
||||
Context::Destroy(entry->context);
|
||||
}
|
||||
free(array_);
|
||||
}
|
||||
|
||||
char* GetArrayPtr(const uint32_t& index) { return array_ + (index % array_size_bytes_); }
|
||||
|
||||
entry_t* GetPoolEntry(const uint32_t& index, rocprofiler_pool_entry_t* pool_entry) {
|
||||
char* ptr = GetArrayPtr(index);
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(ptr);
|
||||
void* payload = ptr + payload_off_;
|
||||
*pool_entry = rocprofiler_pool_entry_t{};
|
||||
pool_entry->context = reinterpret_cast<rocprofiler_t*>(entry->context);
|
||||
pool_entry->payload = payload;
|
||||
return entry;
|
||||
}
|
||||
|
||||
void check_completed() {
|
||||
if (sync_flag_.test_and_set(std::memory_order_acquire) == false) {
|
||||
index_t read_index = read_index_.load(std::memory_order_relaxed);
|
||||
const index_t write_index = write_index_.load(std::memory_order_relaxed);
|
||||
while(read_index < write_index) {
|
||||
rocprofiler_pool_entry_t pool_entry{};
|
||||
entry_t* entry = GetPoolEntry(read_index, &pool_entry);
|
||||
if (entry->completed.load(std::memory_order_acquire) == true) {
|
||||
pool_handler_(&pool_entry, pool_handler_arg_);
|
||||
entry->completed.store(false, std::memory_order_relaxed);
|
||||
read_index += entry_size_bytes_;
|
||||
read_index_.store(read_index, std::memory_order_release);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
sync_flag_.clear(std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t payload_off_;
|
||||
const uint32_t entry_size_bytes_;
|
||||
const uint32_t array_size_bytes_;
|
||||
char* array_data_;
|
||||
char* array_;
|
||||
volatile std::atomic<index_t> read_index_;
|
||||
volatile std::atomic<index_t> write_index_;
|
||||
volatile std::atomic_flag sync_flag_;
|
||||
|
||||
const util::AgentInfo* agent_info_;
|
||||
rocprofiler_feature_t* info_;
|
||||
const uint32_t info_count_;
|
||||
rocprofiler_pool_handler_t pool_handler_;
|
||||
void* pool_handler_arg_;
|
||||
|
||||
bool constructed_;
|
||||
mutex_t mutex_;
|
||||
};
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_CONTEXT_POOL_H_
|
||||
@@ -28,6 +28,7 @@ THE SOFTWARE.
|
||||
#include <vector>
|
||||
|
||||
#include "core/context.h"
|
||||
#include "core/context_pool.h"
|
||||
#include "core/hsa_queue.h"
|
||||
#include "core/intercept_queue.h"
|
||||
#include "core/proxy_queue.h"
|
||||
@@ -477,8 +478,8 @@ PUBLIC_API hsa_status_t rocprofiler_open(hsa_agent_t agent, rocprofiler_feature_
|
||||
}
|
||||
|
||||
rocprofiler::Context** context_ret = reinterpret_cast<rocprofiler::Context**>(handle);
|
||||
rocprofiler::Context::Create(context_ret, agent_info, queue, features, feature_count, properties->handler,
|
||||
properties->handler_arg);
|
||||
*context_ret = rocprofiler::Context::Create(agent_info, queue, features, feature_count,
|
||||
properties->handler, properties->handler_arg);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -608,6 +609,64 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Open profiling pool
|
||||
PUBLIC_API hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, // GPU handle
|
||||
rocprofiler_feature_t* features, // [in] profiling features array
|
||||
uint32_t feature_count, // profiling info count
|
||||
rocprofiler_pool_t** pool, // [out] context object
|
||||
uint32_t mode, // profiling mode mask
|
||||
rocprofiler_pool_properties_t* properties) // pool properties
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
|
||||
const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent);
|
||||
if (agent_info == NULL) {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "agent is not found");
|
||||
}
|
||||
|
||||
rocprofiler::ContextPool* obj = rocprofiler::ContextPool::Create(
|
||||
properties->num_entries,
|
||||
properties->payload_bytes,
|
||||
agent_info,
|
||||
features,
|
||||
feature_count,
|
||||
properties->handler,
|
||||
properties->handler_arg
|
||||
);
|
||||
*pool = reinterpret_cast<rocprofiler_pool_t*>(obj);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Close profiling pool
|
||||
PUBLIC_API hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool) // profiling pool handle
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::ContextPool* obj = reinterpret_cast<rocprofiler::ContextPool*>(pool);
|
||||
rocprofiler::ContextPool::Destroy(obj);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Fetch profiling pool entry
|
||||
PUBLIC_API hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t* pool, // profiling pool handle
|
||||
rocprofiler_pool_entry_t* entry) // [out] empty profling pool entry
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::ContextPool* context_pool = reinterpret_cast<rocprofiler::ContextPool*>(pool);
|
||||
context_pool->Fetch(entry);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Fetch profiling pool entry
|
||||
PUBLIC_API hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool) // profiling pool handle
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::ContextPool* context_pool = reinterpret_cast<rocprofiler::ContextPool*>(pool);
|
||||
context_pool->Flush();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Return the info for a given info kind
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_info(
|
||||
const hsa_agent_t *agent,
|
||||
|
||||
@@ -25,9 +25,10 @@ THE SOFTWARE.
|
||||
#include <unistd.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
@@ -36,6 +37,7 @@ THE SOFTWARE.
|
||||
#include "dummy_kernel/dummy_kernel.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
#include "util/test_assert.h"
|
||||
#include "util/xml.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
@@ -45,6 +47,9 @@ THE SOFTWARE.
|
||||
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
// Tool is unloaded
|
||||
volatile bool is_loaded = false;
|
||||
// Profiling features
|
||||
//rocprofiler_feature_t* features = NULL;
|
||||
//unsigned feature_count = 0;
|
||||
|
||||
// Error handler
|
||||
void fatal(const std::string msg) {
|
||||
@@ -72,8 +77,19 @@ struct context_entry_t {
|
||||
rocprofiler_callback_data_t data;
|
||||
};
|
||||
|
||||
// Context callback arg
|
||||
struct callbacks_arg_t {
|
||||
rocprofiler_pool_t** pools;
|
||||
};
|
||||
|
||||
// Handler callback arg
|
||||
struct handler_arg_t {
|
||||
rocprofiler_feature_t* features;
|
||||
unsigned feature_count;
|
||||
};
|
||||
|
||||
// Dump stored context entry
|
||||
void dump_context_entry(context_entry_t* entry) {
|
||||
void dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features, unsigned feature_count) {
|
||||
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
|
||||
while (valid->load() == false) sched_yield();
|
||||
|
||||
@@ -97,26 +113,44 @@ void dump_context_entry(context_entry_t* entry) {
|
||||
|
||||
rocprofiler_group_t& group = entry->group;
|
||||
if (group.context == NULL) {
|
||||
fprintf(stderr, "tool error: context is NULL\n");
|
||||
abort();
|
||||
fatal("context is NULL\n");
|
||||
}
|
||||
if (feature_count > 0) {
|
||||
hsa_status_t status = rocprofiler_group_get_data(&group);
|
||||
check_status(status);
|
||||
status = rocprofiler_get_metrics(group.context);
|
||||
check_status(status);
|
||||
}
|
||||
|
||||
rocprofiler_close(group.context);
|
||||
for (unsigned i = 0; i < feature_count; ++i) {
|
||||
const rocprofiler_feature_t* p = &features[i];
|
||||
fprintf(stdout, "> %s ", p->name);
|
||||
switch (p->data.kind) {
|
||||
// Output metrics results
|
||||
case ROCPROFILER_DATA_KIND_INT64:
|
||||
fprintf(stdout, "= (%lu)\n", p->data.result_int64);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Undefined data kind(%u)\n", p->data.kind);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Profiling completion handler
|
||||
// Dump and delete the context entry
|
||||
// Return true if the context was dumped successfully
|
||||
bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
|
||||
bool context_handler(const rocprofiler_pool_entry_t* entry, void* arg) {
|
||||
// Context entry
|
||||
context_entry_t* ctx_entry = reinterpret_cast<context_entry_t*>(entry->payload);
|
||||
handler_arg_t* handler_arg = reinterpret_cast<handler_arg_t*>(arg);
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
|
||||
dump_context_entry(entry);
|
||||
delete entry;
|
||||
dump_context_entry(ctx_entry, handler_arg->features, handler_arg->feature_count);
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
@@ -125,35 +159,65 @@ bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
|
||||
return false;
|
||||
}
|
||||
#if 0
|
||||
// Profiling completion handler
|
||||
// Dump and delete the context entry
|
||||
// Return true if the context was dumped successfully
|
||||
bool context_handler1(rocprofiler_group_t group, void* arg) {
|
||||
context_entry_t* ctx_entry = reinterpret_cast<context_entry_t*>(arg);
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
|
||||
dump_context_entry(ctx_entry, features, feature_count);
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
abort();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
// Kernel disoatch callback
|
||||
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/,
|
||||
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* arg,
|
||||
rocprofiler_group_t* group) {
|
||||
// Passed tool data
|
||||
hsa_agent_t agent = callback_data->agent;
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
|
||||
// Context entry
|
||||
context_entry_t* entry = new context_entry_t();
|
||||
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
properties.handler = context_handler;
|
||||
properties.handler_arg = (void*)entry;
|
||||
|
||||
#if 1
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(callback_data->agent, NULL, 0,
|
||||
const unsigned gpu_id = HsaRsrcFactory::Instance().GetAgentInfo(agent)->dev_index;
|
||||
callbacks_arg_t* callbacks_arg = reinterpret_cast<callbacks_arg_t*>(arg);
|
||||
rocprofiler_pool_t* pool = callbacks_arg->pools[gpu_id];
|
||||
rocprofiler_pool_entry_t pool_entry{};
|
||||
status = rocprofiler_pool_fetch(pool, &pool_entry);
|
||||
check_status(status);
|
||||
// Profiling context entry
|
||||
rocprofiler_t* context = pool_entry.context;
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(pool_entry.payload);
|
||||
#else
|
||||
// Open profiling context
|
||||
// context properties
|
||||
context_entry_t* entry = new context_entry_t{};
|
||||
rocprofiler_t* context = NULL;
|
||||
rocprofiler_properties_t properties{};
|
||||
properties.handler = context_handler1;
|
||||
properties.handler_arg = (void*)entry;
|
||||
status = rocprofiler_open(agent, features, feature_count,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
#endif
|
||||
// Get group[0]
|
||||
status = rocprofiler_get_group(context, 0, group);
|
||||
check_status(status);
|
||||
|
||||
// Fill profiling context entry
|
||||
entry->agent = callback_data->agent;
|
||||
entry->agent = agent;
|
||||
entry->group = *group;
|
||||
entry->data = *callback_data;
|
||||
entry->data.kernel_name = strdup(callback_data->kernel_name);
|
||||
@@ -162,26 +226,90 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
unsigned metrics_input(rocprofiler_feature_t** ret) {
|
||||
// Profiling feature objects
|
||||
const unsigned feature_count = 9;
|
||||
rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count];
|
||||
memset(features, 0, feature_count * sizeof(rocprofiler_feature_t));
|
||||
|
||||
// PMC events
|
||||
features[0].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[0].name = "GRBM_COUNT";
|
||||
features[1].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[1].name = "GRBM_GUI_ACTIVE";
|
||||
features[2].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[2].name = "GPUBusy";
|
||||
features[3].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[3].name = "SQ_WAVES";
|
||||
features[4].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[4].name = "SQ_INSTS_VALU";
|
||||
features[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[5].name = "VALUInsts";
|
||||
features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[6].name = "TCC_HIT_sum";
|
||||
features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[7].name = "TCC_MISS_sum";
|
||||
features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[8].name = "WRITE_SIZE";
|
||||
|
||||
*ret = features;
|
||||
return feature_count;
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
// Getting GPU device info
|
||||
const AgentInfo* agent_info = NULL;
|
||||
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) {
|
||||
fprintf(stderr, "GetGpuAgentInfo failed\n");
|
||||
abort();
|
||||
}
|
||||
// Available GPU agents
|
||||
const unsigned gpu_count = HsaRsrcFactory::Instance().GetCountOfGpuAgents();
|
||||
|
||||
// Getting profiling features
|
||||
rocprofiler_feature_t* features = NULL;
|
||||
unsigned feature_count = metrics_input(&features);
|
||||
|
||||
// Handler arg
|
||||
handler_arg_t* handler_arg = new handler_arg_t{};
|
||||
handler_arg->features = features;
|
||||
handler_arg->feature_count = feature_count;
|
||||
|
||||
// Context properties
|
||||
rocprofiler_pool_properties_t properties{};
|
||||
properties.num_entries = 100;
|
||||
properties.payload_bytes = sizeof(context_entry_t);
|
||||
properties.handler = context_handler;
|
||||
properties.handler_arg = handler_arg;
|
||||
|
||||
// Adding dispatch observer
|
||||
callbacks_arg_t* callbacks_arg = new callbacks_arg_t{};
|
||||
callbacks_arg->pools = new rocprofiler_pool_t* [gpu_count];
|
||||
for (unsigned gpu_id = 0; gpu_id < gpu_count; gpu_id++) {
|
||||
// Getting GPU device info
|
||||
const AgentInfo* agent_info = NULL;
|
||||
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(gpu_id, &agent_info) == false) {
|
||||
fprintf(stderr, "GetGpuAgentInfo failed\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
// Open profiling pool
|
||||
rocprofiler_pool_t* pool = NULL;
|
||||
hsa_status_t status = rocprofiler_pool_open(agent_info->dev_id, features, feature_count,
|
||||
&pool, 0/*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
callbacks_arg->pools[gpu_id] = pool;
|
||||
}
|
||||
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{};
|
||||
callbacks_ptrs.dispatch = dispatch_callback;
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL);
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg);
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
// Unregister dispatch callback
|
||||
rocprofiler_remove_queue_callbacks();
|
||||
|
||||
// Dump stored profiling output data
|
||||
fflush(stdout);
|
||||
// CLose profiling pool
|
||||
#if 0
|
||||
hsa_status_t status = rocprofiler_pool_flush(pool);
|
||||
check_status(status);
|
||||
status = rocprofiler_pool_close(pool);
|
||||
check_status(status);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Tool constructor
|
||||
|
||||
@@ -22,6 +22,21 @@
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# test check routin
|
||||
test_status=0
|
||||
eval_test() {
|
||||
label=$1
|
||||
cmdline=$2
|
||||
echo "$label: \"$cmdline\""
|
||||
eval "$cmdline"
|
||||
if [ $? != 0 ] ; then
|
||||
echo "$label: FAILED"
|
||||
test_status=$(($test_status + 1))
|
||||
else
|
||||
echo "$label: PASSED"
|
||||
fi
|
||||
}
|
||||
|
||||
# enable tools load failure reporting
|
||||
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
|
||||
# paths to ROC profiler and oher libraries
|
||||
@@ -37,12 +52,22 @@ export ROCP_METRICS=metrics.xml
|
||||
# test trace
|
||||
export ROC_TEST_TRACE=1
|
||||
|
||||
## Intercepting usage model test
|
||||
|
||||
# tool library loaded by ROC profiler
|
||||
export ROCP_TOOL_LIB=./test/libintercept_test.so
|
||||
../bin/run_tool.sh ./test/ctrl
|
||||
export ROCP_KITER=50
|
||||
export ROCP_DITER=50
|
||||
export ROCP_AGENTS=1
|
||||
export ROCP_THRS=1
|
||||
eval_test "Intercepting usage model test" "../bin/run_tool.sh ./test/ctrl"
|
||||
|
||||
## Standalone sampling usage model test
|
||||
|
||||
unset ROCP_TOOL_LIB
|
||||
eval ./test/standalone_test
|
||||
eval_test "Standalone sampling usage model test" ./test/standalone_test
|
||||
|
||||
## Libtool test
|
||||
|
||||
# tool library loaded by ROC profiler
|
||||
export ROCP_TOOL_LIB=libtool.so
|
||||
@@ -61,7 +86,9 @@ export ROCP_DITER=50
|
||||
export ROCP_AGENTS=1
|
||||
export ROCP_THRS=1
|
||||
export ROCP_INPUT=input.xml
|
||||
eval ./test/ctrl
|
||||
eval_test "'rocprof' libtool test" ./test/ctrl
|
||||
|
||||
## Libtool test, counter sets
|
||||
|
||||
# Memcopies tracking
|
||||
export ROCP_MCOPY_TRACKING=1
|
||||
@@ -69,10 +96,11 @@ export ROCP_MCOPY_TRACKING=1
|
||||
export ROCP_KITER=1
|
||||
export ROCP_DITER=4
|
||||
export ROCP_INPUT=input1.xml
|
||||
eval ./test/ctrl
|
||||
eval_test "libtool test, counter sets" ./test/ctrl
|
||||
|
||||
#valgrind --leak-check=full $tbin
|
||||
#valgrind --tool=massif $tbin
|
||||
#ms_print massif.out.<N>
|
||||
|
||||
exit 0
|
||||
if [ $test_status != 0 ] ; then echo "$test_status tests failed"; fi
|
||||
exit $test_status
|
||||
|
||||
새 이슈에서 참조
사용자 차단