diff --git a/projects/rocprofiler/bin/run_tool.sh b/projects/rocprofiler/bin/run_tool.sh index 5ee438c09b..988ca8df0a 100755 --- a/projects/rocprofiler/bin/run_tool.sh +++ b/projects/rocprofiler/bin/run_tool.sh @@ -14,13 +14,13 @@ else test_app=$* # paths to ROC profiler and oher libraries -export LD_LIBRARY_PATH=$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH export PATH=.:$PATH # ROC profiler library loaded by HSA runtime export HSA_TOOLS_LIB=librocprofiler64.so.1 # tool library loaded by ROC profiler -if [ -z $ROCP_TOOL_LIB ] ; then +if [ -z "$ROCP_TOOL_LIB" ] ; then export ROCP_TOOL_LIB=libintercept_test.so fi # enable error messages @@ -30,7 +30,9 @@ export ROCPROFILER_LOG=1 # ROC profiler metrics config file unset ROCP_PROXY_QUEUE # ROC profiler metrics config file -export ROCP_METRICS=$BIN_DIR/lib/metrics.xml +if [ -z "$ROCP_METRICS" ] ; then + export ROCP_METRICS=$BIN_DIR/lib/metrics.xml +fi LD_PRELOAD=$ROCP_TOOL_LIB $test_app fi diff --git a/projects/rocprofiler/inc/rocprofiler.h b/projects/rocprofiler/inc/rocprofiler.h index f7d559c5c7..003365ec10 100644 --- a/projects/rocprofiler/inc/rocprofiler.h +++ b/projects/rocprofiler/inc/rocprofiler.h @@ -383,6 +383,50 @@ hsa_status_t rocprofiler_queue_create_profiled( void* data, uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue); +//////////////////////////////////////////////////////////////////////////////// +// Profiling pool +// +// Support for profiling contexts pool + +// Profiling pool +typedef void rocprofiler_pool_t; + +// Profiling pool entry +typedef struct { + rocprofiler_t* context; // context object + void* payload; // payload data object +} rocprofiler_pool_entry_t; + +// Profiling handler, calling on profiling completion +typedef bool (*rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t* entry, void* arg); + +// Profiling preperties +typedef struct { + uint32_t num_entries; // pool size entries + uint32_t payload_bytes; // payload size bytes + rocprofiler_pool_handler_t handler; // handler on context completion + void* handler_arg; // the handler arg +} rocprofiler_pool_properties_t; + +// Open profiling pool +hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, // GPU handle + rocprofiler_feature_t* features, // [in] profiling features array + uint32_t feature_count, // profiling info count + rocprofiler_pool_t** pool, // [out] context object + uint32_t mode, // profiling mode mask + rocprofiler_pool_properties_t*); // pool properties + +// Close profiling pool +hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool); // profiling pool handle + +// Fetch profiling pool entry +hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t* pool, // profiling pool handle + rocprofiler_pool_entry_t* entry); // [out] empty profling pool entry + +// Flush profiling pool +hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool); // profiling pool handle + +//////////////////////////////////////////////////////////////////////////////// #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/projects/rocprofiler/src/core/context.h b/projects/rocprofiler/src/core/context.h index a7cac87312..a59effd0d3 100644 --- a/projects/rocprofiler/src/core/context.h +++ b/projects/rocprofiler/src/core/context.h @@ -153,11 +153,20 @@ class Context { public: typedef std::map info_map_t; - static void Create(Context** context, const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, + static void Create(Context* obj, const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) { - *context = NULL; + new (obj) Context(agent_info, queue, info, info_count, handler, handler_arg); + obj->Construct(agent_info, queue, info, info_count, handler, handler_arg); + } + + static void Release(Context* obj) { obj->Destruct(); } + + static Context* Create(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, + const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) + { Context* obj = new Context(agent_info, queue, info, info_count, handler, handler_arg); + if (obj == NULL) EXC_RAISING(HSA_STATUS_ERROR, "allocation error"); try { obj->Construct(agent_info, queue, info, info_count, handler, handler_arg); } catch(...) { @@ -165,7 +174,7 @@ class Context { obj = NULL; throw; } - *context = obj; + return obj; } static void Destroy(Context* obj) { if (obj != NULL) delete obj; } @@ -300,7 +309,9 @@ class Context { handler_arg_(handler_arg) {} - ~Context() { + ~Context() { Destruct(); } + + void Destruct() { for (const auto& v : info_map_) { const std::string& name = v.first; const rocprofiler_feature_t* info = v.second; @@ -311,7 +322,6 @@ class Context { } } - void Construct(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) { diff --git a/projects/rocprofiler/src/core/context_pool.h b/projects/rocprofiler/src/core/context_pool.h new file mode 100644 index 0000000000..3056cccccd --- /dev/null +++ b/projects/rocprofiler/src/core/context_pool.h @@ -0,0 +1,193 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef SRC_CORE_CONTEXT_POOL_H_ +#define SRC_CORE_CONTEXT_POOL_H_ + +#include "inc/rocprofiler.h" + +#include + +#include "core/context.h" + +namespace rocprofiler { +class ContextPool { + public: + typedef uint64_t index_t; + typedef std::mutex mutex_t; + + struct entry_t { + ContextPool* pool; + Context* context; + std::atomic completed; + }; + + static ContextPool* Create( + uint32_t num_entries, + uint32_t payload_bytes, + const util::AgentInfo* agent_info, + rocprofiler_feature_t* info, + const uint32_t info_count, + rocprofiler_pool_handler_t handler, + void* handler_arg) + { + ContextPool* obj = new ContextPool(num_entries, payload_bytes, agent_info, info, info_count, handler, handler_arg); + if (obj == NULL) EXC_RAISING(HSA_STATUS_ERROR, "allocation error"); + return obj; + } + + static void Destroy(ContextPool* pool) { delete pool; } + + void Fetch(rocprofiler_pool_entry_t* pool_entry) { + if (constructed_ == false) { + Construct(agent_info_, info_, info_count_); + } + const index_t write_index = write_index_.fetch_add(entry_size_bytes_, std::memory_order_relaxed); + while (write_index >= (read_index_.load(std::memory_order_acquire) + array_size_bytes_)) { + check_completed(); + std::this_thread::yield(); + } + entry_t* entry = GetPoolEntry(write_index, pool_entry); + if (entry->completed.load(std::memory_order_relaxed) != false) EXC_RAISING(HSA_STATUS_ERROR, "Corrupted pool entry"); + } + + void Flush() { + check_completed(); + } + + private: + static unsigned aligned64(const unsigned& size) { return (size + 0x3f) & ~0x3fu; } + + static bool context_handler(rocprofiler_group_t group, void* arg) { + entry_t* entry = reinterpret_cast(arg); + entry->completed.store(true, std::memory_order_release); + entry->pool->check_completed(); + return true; + } + + ContextPool( + uint32_t num_entries, + uint32_t payload_bytes, + const util::AgentInfo* agent_info, + rocprofiler_feature_t* info, + const uint32_t info_count, + rocprofiler_pool_handler_t pool_handler, + void* pool_handler_arg + ) : + payload_off_(aligned64(sizeof(entry_t))), + entry_size_bytes_(payload_off_ + aligned64(payload_bytes)), + array_size_bytes_(entry_size_bytes_ * num_entries), + array_(NULL), + read_index_(0), + write_index_(0), + sync_flag_(false), + + agent_info_(agent_info), + info_(info), + info_count_(info_count), + pool_handler_(pool_handler), + pool_handler_arg_(pool_handler_arg), + constructed_(false) + {} + + void Construct(const util::AgentInfo* agent_info, rocprofiler_feature_t* info, const uint32_t info_count) { + std::lock_guard lck(mutex_); + + if (constructed_ == false) { + array_data_ = (char*) malloc(array_size_bytes_ + 0x3f); + array_ = reinterpret_cast(((intptr_t)array_data_ + 0x3f) >> 6 << 6); + if (((intptr_t)array_ & 0x3f) != 0) EXC_RAISING(HSA_STATUS_ERROR, "Pool array is not aligned"); + memset(array_, 0, array_size_bytes_); + + const char* end = array_ + array_size_bytes_; + for (char* ptr = array_; ptr < end; ptr += entry_size_bytes_) { + entry_t* entry = reinterpret_cast(ptr); + entry->pool = this; + entry->context = Context::Create(agent_info, NULL, info, info_count, ContextPool::context_handler, ptr); + } + + constructed_ = true; + } + } + + ~ContextPool() { + const char* end = array_ + array_size_bytes_; + for (char* ptr = array_; ptr < end; ptr += entry_size_bytes_) { + entry_t* entry = reinterpret_cast(ptr); + Context::Destroy(entry->context); + } + free(array_); + } + + char* GetArrayPtr(const uint32_t& index) { return array_ + (index % array_size_bytes_); } + + entry_t* GetPoolEntry(const uint32_t& index, rocprofiler_pool_entry_t* pool_entry) { + char* ptr = GetArrayPtr(index); + entry_t* entry = reinterpret_cast(ptr); + void* payload = ptr + payload_off_; + *pool_entry = rocprofiler_pool_entry_t{}; + pool_entry->context = reinterpret_cast(entry->context); + pool_entry->payload = payload; + return entry; + } + + void check_completed() { + if (sync_flag_.test_and_set(std::memory_order_acquire) == false) { + index_t read_index = read_index_.load(std::memory_order_relaxed); + const index_t write_index = write_index_.load(std::memory_order_relaxed); + while(read_index < write_index) { + rocprofiler_pool_entry_t pool_entry{}; + entry_t* entry = GetPoolEntry(read_index, &pool_entry); + if (entry->completed.load(std::memory_order_acquire) == true) { + pool_handler_(&pool_entry, pool_handler_arg_); + entry->completed.store(false, std::memory_order_relaxed); + read_index += entry_size_bytes_; + read_index_.store(read_index, std::memory_order_release); + } else { + break; + } + } + sync_flag_.clear(std::memory_order_release); + } + } + + const uint32_t payload_off_; + const uint32_t entry_size_bytes_; + const uint32_t array_size_bytes_; + char* array_data_; + char* array_; + volatile std::atomic read_index_; + volatile std::atomic write_index_; + volatile std::atomic_flag sync_flag_; + + const util::AgentInfo* agent_info_; + rocprofiler_feature_t* info_; + const uint32_t info_count_; + rocprofiler_pool_handler_t pool_handler_; + void* pool_handler_arg_; + + bool constructed_; + mutex_t mutex_; +}; +} // namespace rocprofiler + +#endif // SRC_CORE_CONTEXT_POOL_H_ diff --git a/projects/rocprofiler/src/core/rocprofiler.cpp b/projects/rocprofiler/src/core/rocprofiler.cpp index d651136f0e..8eed0fcda7 100644 --- a/projects/rocprofiler/src/core/rocprofiler.cpp +++ b/projects/rocprofiler/src/core/rocprofiler.cpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include #include "core/context.h" +#include "core/context_pool.h" #include "core/hsa_queue.h" #include "core/intercept_queue.h" #include "core/proxy_queue.h" @@ -477,8 +478,8 @@ PUBLIC_API hsa_status_t rocprofiler_open(hsa_agent_t agent, rocprofiler_feature_ } rocprofiler::Context** context_ret = reinterpret_cast(handle); - rocprofiler::Context::Create(context_ret, agent_info, queue, features, feature_count, properties->handler, - properties->handler_arg); + *context_ret = rocprofiler::Context::Create(agent_info, queue, features, feature_count, + properties->handler, properties->handler_arg); API_METHOD_SUFFIX } @@ -608,6 +609,64 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data( API_METHOD_SUFFIX } +//////////////////////////////////////////////////////////////////////////////// +// Open profiling pool +PUBLIC_API hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, // GPU handle + rocprofiler_feature_t* features, // [in] profiling features array + uint32_t feature_count, // profiling info count + rocprofiler_pool_t** pool, // [out] context object + uint32_t mode, // profiling mode mask + rocprofiler_pool_properties_t* properties) // pool properties +{ + API_METHOD_PREFIX + rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); + const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent); + if (agent_info == NULL) { + EXC_RAISING(HSA_STATUS_ERROR, "agent is not found"); + } + + rocprofiler::ContextPool* obj = rocprofiler::ContextPool::Create( + properties->num_entries, + properties->payload_bytes, + agent_info, + features, + feature_count, + properties->handler, + properties->handler_arg + ); + *pool = reinterpret_cast(obj); + API_METHOD_SUFFIX +} + +// Close profiling pool +PUBLIC_API hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool) // profiling pool handle +{ + API_METHOD_PREFIX + rocprofiler::ContextPool* obj = reinterpret_cast(pool); + rocprofiler::ContextPool::Destroy(obj); + API_METHOD_SUFFIX +} + +// Fetch profiling pool entry +PUBLIC_API hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t* pool, // profiling pool handle + rocprofiler_pool_entry_t* entry) // [out] empty profling pool entry +{ + API_METHOD_PREFIX + rocprofiler::ContextPool* context_pool = reinterpret_cast(pool); + context_pool->Fetch(entry); + API_METHOD_SUFFIX +} + +// Fetch profiling pool entry +PUBLIC_API hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool) // profiling pool handle +{ + API_METHOD_PREFIX + rocprofiler::ContextPool* context_pool = reinterpret_cast(pool); + context_pool->Flush(); + API_METHOD_SUFFIX +} + +//////////////////////////////////////////////////////////////////////////////// // Return the info for a given info kind PUBLIC_API hsa_status_t rocprofiler_get_info( const hsa_agent_t *agent, diff --git a/projects/rocprofiler/test/app/intercept_test.cpp b/projects/rocprofiler/test/app/intercept_test.cpp index 87e00d6468..876b31020f 100644 --- a/projects/rocprofiler/test/app/intercept_test.cpp +++ b/projects/rocprofiler/test/app/intercept_test.cpp @@ -25,9 +25,10 @@ THE SOFTWARE. #include #include -#include -#include #include +#include +#include +#include #include "ctrl/run_kernel.h" #include "ctrl/test_aql.h" @@ -36,6 +37,7 @@ THE SOFTWARE. #include "dummy_kernel/dummy_kernel.h" #include "simple_convolution/simple_convolution.h" #include "util/test_assert.h" +#include "util/xml.h" #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) @@ -45,6 +47,9 @@ THE SOFTWARE. pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; // Tool is unloaded volatile bool is_loaded = false; +// Profiling features +//rocprofiler_feature_t* features = NULL; +//unsigned feature_count = 0; // Error handler void fatal(const std::string msg) { @@ -72,8 +77,19 @@ struct context_entry_t { rocprofiler_callback_data_t data; }; +// Context callback arg +struct callbacks_arg_t { + rocprofiler_pool_t** pools; +}; + +// Handler callback arg +struct handler_arg_t { + rocprofiler_feature_t* features; + unsigned feature_count; +}; + // Dump stored context entry -void dump_context_entry(context_entry_t* entry) { +void dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features, unsigned feature_count) { volatile std::atomic* valid = reinterpret_cast*>(&entry->valid); while (valid->load() == false) sched_yield(); @@ -97,26 +113,44 @@ void dump_context_entry(context_entry_t* entry) { rocprofiler_group_t& group = entry->group; if (group.context == NULL) { - fprintf(stderr, "tool error: context is NULL\n"); - abort(); + fatal("context is NULL\n"); + } + if (feature_count > 0) { + hsa_status_t status = rocprofiler_group_get_data(&group); + check_status(status); + status = rocprofiler_get_metrics(group.context); + check_status(status); } - rocprofiler_close(group.context); + for (unsigned i = 0; i < feature_count; ++i) { + const rocprofiler_feature_t* p = &features[i]; + fprintf(stdout, "> %s ", p->name); + switch (p->data.kind) { + // Output metrics results + case ROCPROFILER_DATA_KIND_INT64: + fprintf(stdout, "= (%lu)\n", p->data.result_int64); + break; + default: + fprintf(stderr, "Undefined data kind(%u)\n", p->data.kind); + abort(); + } + } } // Profiling completion handler // Dump and delete the context entry // Return true if the context was dumped successfully -bool context_handler(rocprofiler_group_t group, void* arg) { - context_entry_t* entry = reinterpret_cast(arg); +bool context_handler(const rocprofiler_pool_entry_t* entry, void* arg) { + // Context entry + context_entry_t* ctx_entry = reinterpret_cast(entry->payload); + handler_arg_t* handler_arg = reinterpret_cast(arg); if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); abort(); } - dump_context_entry(entry); - delete entry; + dump_context_entry(ctx_entry, handler_arg->features, handler_arg->feature_count); if (pthread_mutex_unlock(&mutex) != 0) { perror("pthread_mutex_unlock"); @@ -125,35 +159,65 @@ bool context_handler(rocprofiler_group_t group, void* arg) { return false; } +#if 0 +// Profiling completion handler +// Dump and delete the context entry +// Return true if the context was dumped successfully +bool context_handler1(rocprofiler_group_t group, void* arg) { + context_entry_t* ctx_entry = reinterpret_cast(arg); + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + abort(); + } + + dump_context_entry(ctx_entry, features, feature_count); + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + abort(); + } + + return false; +} +#endif // Kernel disoatch callback -hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/, +hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* arg, rocprofiler_group_t* group) { + // Passed tool data + hsa_agent_t agent = callback_data->agent; // HSA status hsa_status_t status = HSA_STATUS_ERROR; - // Profiling context - rocprofiler_t* context = NULL; - - // Context entry - context_entry_t* entry = new context_entry_t(); - - // context properties - rocprofiler_properties_t properties{}; - properties.handler = context_handler; - properties.handler_arg = (void*)entry; - +#if 1 // Open profiling context - status = rocprofiler_open(callback_data->agent, NULL, 0, + const unsigned gpu_id = HsaRsrcFactory::Instance().GetAgentInfo(agent)->dev_index; + callbacks_arg_t* callbacks_arg = reinterpret_cast(arg); + rocprofiler_pool_t* pool = callbacks_arg->pools[gpu_id]; + rocprofiler_pool_entry_t pool_entry{}; + status = rocprofiler_pool_fetch(pool, &pool_entry); + check_status(status); + // Profiling context entry + rocprofiler_t* context = pool_entry.context; + context_entry_t* entry = reinterpret_cast(pool_entry.payload); +#else + // Open profiling context + // context properties + context_entry_t* entry = new context_entry_t{}; + rocprofiler_t* context = NULL; + rocprofiler_properties_t properties{}; + properties.handler = context_handler1; + properties.handler_arg = (void*)entry; + status = rocprofiler_open(agent, features, feature_count, &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); check_status(status); - +#endif // Get group[0] status = rocprofiler_get_group(context, 0, group); check_status(status); // Fill profiling context entry - entry->agent = callback_data->agent; + entry->agent = agent; entry->group = *group; entry->data = *callback_data; entry->data.kernel_name = strdup(callback_data->kernel_name); @@ -162,26 +226,90 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, return HSA_STATUS_SUCCESS; } +unsigned metrics_input(rocprofiler_feature_t** ret) { + // Profiling feature objects + const unsigned feature_count = 9; + rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; + memset(features, 0, feature_count * sizeof(rocprofiler_feature_t)); + + // PMC events + features[0].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[0].name = "GRBM_COUNT"; + features[1].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[1].name = "GRBM_GUI_ACTIVE"; + features[2].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[2].name = "GPUBusy"; + features[3].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[3].name = "SQ_WAVES"; + features[4].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[4].name = "SQ_INSTS_VALU"; + features[5].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[5].name = "VALUInsts"; + features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[6].name = "TCC_HIT_sum"; + features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[7].name = "TCC_MISS_sum"; + features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; + features[8].name = "WRITE_SIZE"; + + *ret = features; + return feature_count; +} + void initialize() { - // Getting GPU device info - const AgentInfo* agent_info = NULL; - if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) { - fprintf(stderr, "GetGpuAgentInfo failed\n"); - abort(); - } + // Available GPU agents + const unsigned gpu_count = HsaRsrcFactory::Instance().GetCountOfGpuAgents(); + + // Getting profiling features + rocprofiler_feature_t* features = NULL; + unsigned feature_count = metrics_input(&features); + + // Handler arg + handler_arg_t* handler_arg = new handler_arg_t{}; + handler_arg->features = features; + handler_arg->feature_count = feature_count; + + // Context properties + rocprofiler_pool_properties_t properties{}; + properties.num_entries = 100; + properties.payload_bytes = sizeof(context_entry_t); + properties.handler = context_handler; + properties.handler_arg = handler_arg; // Adding dispatch observer + callbacks_arg_t* callbacks_arg = new callbacks_arg_t{}; + callbacks_arg->pools = new rocprofiler_pool_t* [gpu_count]; + for (unsigned gpu_id = 0; gpu_id < gpu_count; gpu_id++) { + // Getting GPU device info + const AgentInfo* agent_info = NULL; + if (HsaRsrcFactory::Instance().GetGpuAgentInfo(gpu_id, &agent_info) == false) { + fprintf(stderr, "GetGpuAgentInfo failed\n"); + abort(); + } + + // Open profiling pool + rocprofiler_pool_t* pool = NULL; + hsa_status_t status = rocprofiler_pool_open(agent_info->dev_id, features, feature_count, + &pool, 0/*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); + check_status(status); + callbacks_arg->pools[gpu_id] = pool; + } + rocprofiler_queue_callbacks_t callbacks_ptrs{}; callbacks_ptrs.dispatch = dispatch_callback; - rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL); + rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg); } void cleanup() { // Unregister dispatch callback rocprofiler_remove_queue_callbacks(); - - // Dump stored profiling output data - fflush(stdout); + // CLose profiling pool +#if 0 + hsa_status_t status = rocprofiler_pool_flush(pool); + check_status(status); + status = rocprofiler_pool_close(pool); + check_status(status); +#endif } // Tool constructor diff --git a/projects/rocprofiler/test/run.sh b/projects/rocprofiler/test/run.sh index 580f47132f..6f6d1f88e5 100755 --- a/projects/rocprofiler/test/run.sh +++ b/projects/rocprofiler/test/run.sh @@ -22,6 +22,21 @@ # THE SOFTWARE. ################################################################################ +# test check routin +test_status=0 +eval_test() { + label=$1 + cmdline=$2 + echo "$label: \"$cmdline\"" + eval "$cmdline" + if [ $? != 0 ] ; then + echo "$label: FAILED" + test_status=$(($test_status + 1)) + else + echo "$label: PASSED" + fi +} + # enable tools load failure reporting export HSA_TOOLS_REPORT_LOAD_FAILURE=1 # paths to ROC profiler and oher libraries @@ -37,12 +52,22 @@ export ROCP_METRICS=metrics.xml # test trace export ROC_TEST_TRACE=1 +## Intercepting usage model test + # tool library loaded by ROC profiler export ROCP_TOOL_LIB=./test/libintercept_test.so -../bin/run_tool.sh ./test/ctrl +export ROCP_KITER=50 +export ROCP_DITER=50 +export ROCP_AGENTS=1 +export ROCP_THRS=1 +eval_test "Intercepting usage model test" "../bin/run_tool.sh ./test/ctrl" + +## Standalone sampling usage model test unset ROCP_TOOL_LIB -eval ./test/standalone_test +eval_test "Standalone sampling usage model test" ./test/standalone_test + +## Libtool test # tool library loaded by ROC profiler export ROCP_TOOL_LIB=libtool.so @@ -61,7 +86,9 @@ export ROCP_DITER=50 export ROCP_AGENTS=1 export ROCP_THRS=1 export ROCP_INPUT=input.xml -eval ./test/ctrl +eval_test "'rocprof' libtool test" ./test/ctrl + +## Libtool test, counter sets # Memcopies tracking export ROCP_MCOPY_TRACKING=1 @@ -69,10 +96,11 @@ export ROCP_MCOPY_TRACKING=1 export ROCP_KITER=1 export ROCP_DITER=4 export ROCP_INPUT=input1.xml -eval ./test/ctrl +eval_test "libtool test, counter sets" ./test/ctrl #valgrind --leak-check=full $tbin #valgrind --tool=massif $tbin #ms_print massif.out. -exit 0 +if [ $test_status != 0 ] ; then echo "$test_status tests failed"; fi +exit $test_status