dispatch data th-id, queue-id, kernel-object
Change-Id: Ic5593603c6587cad17be33618d011a9aefc648ab
This commit is contained in:
Executable
+36
@@ -0,0 +1,36 @@
|
||||
#!/bin/sh
|
||||
BIN_DIR=`dirname $0`
|
||||
BIN_DIR=`cd $BIN_DIR; pwd`
|
||||
PKG_DIR=`echo $BIN_DIR | sed "s/\/bin\/*//"`
|
||||
BIN_DIR=$PKG_DIR/bin
|
||||
|
||||
# PATH to custom HSA libs
|
||||
HSA_PATH=$PKG_DIR/lib/hsa
|
||||
|
||||
if [ -z "$1" ] ; then
|
||||
echo "Usage: $0 <cmd line>"
|
||||
else
|
||||
# profiler plugin library
|
||||
test_app=$*
|
||||
|
||||
# paths to ROC profiler and oher libraries
|
||||
export LD_LIBRARY_PATH=$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH
|
||||
export PATH=.:$PATH
|
||||
|
||||
# ROC profiler library loaded by HSA runtime
|
||||
export HSA_TOOLS_LIB=librocprofiler64.so.1
|
||||
# tool library loaded by ROC profiler
|
||||
if [ -z $ROCP_TOOL_LIB ] ; then
|
||||
export ROCP_TOOL_LIB=libintercept_test.so
|
||||
fi
|
||||
# enable error messages
|
||||
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
|
||||
export HSA_VEN_AMD_AQLPROFILE_LOG=1
|
||||
export ROCPROFILER_LOG=1
|
||||
# ROC profiler metrics config file
|
||||
unset ROCP_PROXY_QUEUE
|
||||
# ROC profiler metrics config file
|
||||
export ROCP_METRICS=$BIN_DIR/lib/metrics.xml
|
||||
|
||||
LD_PRELOAD=$ROCP_TOOL_LIB $test_app
|
||||
fi
|
||||
+4
-1
@@ -46,7 +46,7 @@ THE SOFTWARE.
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define ROCPROFILER_VERSION_MAJOR 3
|
||||
#define ROCPROFILER_VERSION_MAJOR 4
|
||||
#define ROCPROFILER_VERSION_MINOR 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -217,8 +217,11 @@ typedef struct {
|
||||
uint32_t agent_index; // GPU index
|
||||
const hsa_queue_t* queue; // HSA queue
|
||||
uint64_t queue_index; // Index in the queue
|
||||
uint32_t queue_id; // Queue id
|
||||
const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet
|
||||
const char* kernel_name; // Kernel name
|
||||
uint64_t kernel_object; // Kernel object pointer
|
||||
int64_t thread_id; // Thread id
|
||||
const rocprofiler_dispatch_record_t* record; // Dispatch record
|
||||
} rocprofiler_callback_data_t;
|
||||
|
||||
|
||||
@@ -37,5 +37,6 @@ const char* InterceptQueue::kernel_none_ = "";
|
||||
Tracker* InterceptQueue::tracker_ = NULL;
|
||||
bool InterceptQueue::tracker_on_ = false;
|
||||
bool InterceptQueue::in_constr_call_ = false;
|
||||
InterceptQueue::queue_id_t InterceptQueue::current_queue_id = 0;
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -26,6 +26,7 @@ THE SOFTWARE.
|
||||
#include <amd_hsa_kernel_code.h>
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
@@ -49,6 +50,7 @@ class InterceptQueue {
|
||||
typedef std::map<uint64_t, InterceptQueue*> obj_map_t;
|
||||
typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data);
|
||||
typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg);
|
||||
typedef uint32_t queue_id_t;
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
@@ -79,6 +81,8 @@ class InterceptQueue {
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
status = proxy->SetInterceptCB(OnSubmitCB, obj);
|
||||
obj->queue_event_callback_ = callback;
|
||||
obj->queue_id = current_queue_id;
|
||||
++current_queue_id;
|
||||
|
||||
in_constr_call_ = false;
|
||||
return status;
|
||||
@@ -139,13 +143,17 @@ class InterceptQueue {
|
||||
}
|
||||
|
||||
// Prepareing dispatch callback data
|
||||
const char* kernel_name = GetKernelName(dispatch_packet);
|
||||
uint64_t kernel_symbol = GetKernelSymbol(dispatch_packet);
|
||||
const char* kernel_name = GetKernelName(kernel_symbol);
|
||||
rocprofiler_callback_data_t data = {obj->agent_info_->dev_id,
|
||||
obj->agent_info_->dev_index,
|
||||
obj->queue_,
|
||||
user_que_idx,
|
||||
obj->queue_id,
|
||||
dispatch_packet,
|
||||
kernel_name,
|
||||
kernel_symbol,
|
||||
syscall(__NR_gettid),
|
||||
(tracker_entry) ? tracker_entry->record : NULL};
|
||||
|
||||
// Calling dispatch callback
|
||||
@@ -222,7 +230,7 @@ class InterceptQueue {
|
||||
return static_cast<hsa_packet_type_t>((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask);
|
||||
}
|
||||
|
||||
static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
static uint64_t GetKernelSymbol(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
const amd_kernel_code_t* kernel_code = NULL;
|
||||
hsa_status_t status =
|
||||
util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
|
||||
@@ -231,8 +239,12 @@ class InterceptQueue {
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
kernel_code = reinterpret_cast<amd_kernel_code_t*>(dispatch_packet->kernel_object);
|
||||
}
|
||||
amd_runtime_loader_debug_info_t* dbg_info = reinterpret_cast<amd_runtime_loader_debug_info_t*>(
|
||||
kernel_code->runtime_loader_kernel_symbol);
|
||||
return kernel_code->runtime_loader_kernel_symbol;
|
||||
}
|
||||
|
||||
static const char* GetKernelName(const uint64_t kernel_symbol) {
|
||||
amd_runtime_loader_debug_info_t* dbg_info =
|
||||
reinterpret_cast<amd_runtime_loader_debug_info_t*>(kernel_symbol);
|
||||
const char* kernel_name = (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
|
||||
|
||||
// Kernel name is mangled name
|
||||
@@ -298,11 +310,13 @@ class InterceptQueue {
|
||||
static Tracker* tracker_;
|
||||
static bool tracker_on_;
|
||||
static bool in_constr_call_;
|
||||
static queue_id_t current_queue_id;
|
||||
|
||||
hsa_queue_t* const queue_;
|
||||
ProxyQueue* const proxy_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
queue_event_callback_t queue_event_callback_;
|
||||
queue_id_t queue_id;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
+14
-2
@@ -37,11 +37,18 @@ file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
|
||||
|
||||
## Standalone test sources
|
||||
set ( STEXE_NAME "standalone_test" )
|
||||
set ( STST_SRC
|
||||
set ( STTST_SRC
|
||||
${TEST_DIR}/app/standalone_test.cpp
|
||||
${TEST_DIR}/ctrl/test_hsa.cpp
|
||||
)
|
||||
|
||||
## Intercept test sources
|
||||
set ( INEXE_NAME "intercept_test" )
|
||||
set ( INTST_SRC
|
||||
${TEST_DIR}/app/intercept_test.cpp
|
||||
${TEST_DIR}/ctrl/test_hsa.cpp
|
||||
)
|
||||
|
||||
## Test control sources
|
||||
set ( CTRL_SRC
|
||||
${TEST_DIR}/app/test.cpp
|
||||
@@ -58,10 +65,15 @@ set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
## Building standalone test executable
|
||||
add_executable ( ${STEXE_NAME} ${STST_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
add_executable ( ${STEXE_NAME} ${STTST_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
target_include_directories ( ${STEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${STEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
|
||||
|
||||
## Building intercept test executable
|
||||
add_library ( ${INEXE_NAME} SHARED ${INTST_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
target_include_directories ( ${INEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${INEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
|
||||
|
||||
## Building ctrl test executable
|
||||
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
|
||||
@@ -0,0 +1,231 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include <hsa.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "dummy_kernel/dummy_kernel.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
// Dispatch callbacks and context handlers synchronization
|
||||
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
// Tool is unloaded
|
||||
volatile bool is_loaded = false;
|
||||
|
||||
// Error handler
|
||||
void fatal(const std::string msg) {
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s\n\n", msg.c_str());
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
// Check returned HSA API status
|
||||
void check_status(hsa_status_t status) {
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* error_string = NULL;
|
||||
rocprofiler_error_string(&error_string);
|
||||
fprintf(stderr, "ERROR: %s\n", error_string);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Context stored entry type
|
||||
struct context_entry_t {
|
||||
bool valid;
|
||||
hsa_agent_t agent;
|
||||
rocprofiler_group_t group;
|
||||
rocprofiler_callback_data_t data;
|
||||
};
|
||||
|
||||
// Dump stored context entry
|
||||
void dump_context_entry(context_entry_t* entry) {
|
||||
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
|
||||
while (valid->load() == false) sched_yield();
|
||||
|
||||
const std::string kernel_name = entry->data.kernel_name;
|
||||
const rocprofiler_dispatch_record_t* record = entry->data.record;
|
||||
|
||||
fflush(stdout);
|
||||
fprintf(stdout, "kernel symbol(0x%lx) name(\"%s\") tid(%ld) queue-id(%u) gpu-id(%u) ",
|
||||
entry->data.kernel_object,
|
||||
kernel_name.c_str(),
|
||||
entry->data.thread_id,
|
||||
entry->data.queue_id,
|
||||
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index);
|
||||
if (record) fprintf(stdout, "time(%lu,%lu,%lu,%lu)",
|
||||
record->dispatch,
|
||||
record->begin,
|
||||
record->end,
|
||||
record->complete);
|
||||
fprintf(stdout, "\n");
|
||||
fflush(stdout);
|
||||
|
||||
rocprofiler_group_t& group = entry->group;
|
||||
if (group.context == NULL) {
|
||||
fprintf(stderr, "tool error: context is NULL\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
rocprofiler_close(group.context);
|
||||
}
|
||||
|
||||
// Profiling completion handler
|
||||
// Dump and delete the context entry
|
||||
// Return true if the context was dumped successfully
|
||||
bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
|
||||
dump_context_entry(entry);
|
||||
delete entry;
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
abort();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Kernel disoatch callback
|
||||
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/,
|
||||
rocprofiler_group_t* group) {
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
|
||||
// Context entry
|
||||
context_entry_t* entry = new context_entry_t();
|
||||
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
properties.handler = context_handler;
|
||||
properties.handler_arg = (void*)entry;
|
||||
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(callback_data->agent, NULL, 0,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
// Get group[0]
|
||||
status = rocprofiler_get_group(context, 0, group);
|
||||
check_status(status);
|
||||
|
||||
// Fill profiling context entry
|
||||
entry->agent = callback_data->agent;
|
||||
entry->group = *group;
|
||||
entry->data = *callback_data;
|
||||
entry->data.kernel_name = strdup(callback_data->kernel_name);
|
||||
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
// Getting GPU device info
|
||||
const AgentInfo* agent_info = NULL;
|
||||
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) {
|
||||
fprintf(stderr, "GetGpuAgentInfo failed\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
// Adding dispatch observer
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{};
|
||||
callbacks_ptrs.dispatch = dispatch_callback;
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL);
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
// Unregister dispatch callback
|
||||
rocprofiler_remove_queue_callbacks();
|
||||
|
||||
// Dump stored profiling output data
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
// Tool constructor
|
||||
extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
|
||||
{
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
if (is_loaded) return;
|
||||
is_loaded = true;
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
abort();
|
||||
}
|
||||
|
||||
// Enable timestamping
|
||||
settings->timestamp_on = true;
|
||||
|
||||
// Initialize profiling
|
||||
initialize();
|
||||
}
|
||||
|
||||
// Tool destructor
|
||||
extern "C" PUBLIC_API void OnUnloadTool() {
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
if (!is_loaded) return;
|
||||
is_loaded = false;
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
abort();
|
||||
}
|
||||
|
||||
// Final resources cleanup
|
||||
cleanup();
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
printf("INTT constructor\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
if (is_loaded == true) OnUnloadTool();
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include <hsa.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "dummy_kernel/dummy_kernel.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
// Dispatch callbacks and context handlers synchronization
|
||||
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
|
||||
// Error handler
|
||||
void fatal(const std::string msg) {
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s\n\n", msg.c_str());
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
// Check returned HSA API status
|
||||
void check_status(hsa_status_t status) {
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* error_string = NULL;
|
||||
rocprofiler_error_string(&error_string);
|
||||
fprintf(stderr, "ERROR: %s\n", error_string);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Context stored entry type
|
||||
struct context_entry_t {
|
||||
bool valid;
|
||||
hsa_agent_t agent;
|
||||
rocprofiler_group_t group;
|
||||
rocprofiler_callback_data_t data;
|
||||
};
|
||||
|
||||
// Dump stored context entry
|
||||
void dump_context_entry(context_entry_t* entry) {
|
||||
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
|
||||
while (valid->load() == false) sched_yield();
|
||||
|
||||
const std::string kernel_name = entry->data.kernel_name;
|
||||
const rocprofiler_dispatch_record_t* record = entry->data.record;
|
||||
|
||||
fflush(stdout);
|
||||
fprintf(stdout, "kernel symbol(0x%lx) name(\"%s\")", entry->data.kernel_object, kernel_name.c_str());
|
||||
if (record) fprintf(stdout, ", gpu-id(%u), time(%lu,%lu,%lu,%lu)",
|
||||
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
record->dispatch,
|
||||
record->begin,
|
||||
record->end,
|
||||
record->complete);
|
||||
fprintf(stdout, "\n");
|
||||
fflush(stdout);
|
||||
|
||||
rocprofiler_group_t& group = entry->group;
|
||||
if (group.context == NULL) {
|
||||
fprintf(stderr, "tool error: context is NULL\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
rocprofiler_close(group.context);
|
||||
}
|
||||
|
||||
// Profiling completion handler
|
||||
// Dump and delete the context entry
|
||||
// Return true if the context was dumped successfully
|
||||
bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
|
||||
dump_context_entry(entry);
|
||||
delete entry;
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
abort();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Kernel disoatch callback
|
||||
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/,
|
||||
rocprofiler_group_t* group) {
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
|
||||
// Context entry
|
||||
context_entry_t* entry = new context_entry_t();
|
||||
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
properties.handler = context_handler;
|
||||
properties.handler_arg = (void*)entry;
|
||||
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(callback_data->agent, NULL, 0,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
// Get group[0]
|
||||
status = rocprofiler_get_group(context, 0, group);
|
||||
check_status(status);
|
||||
|
||||
// Fill profiling context entry
|
||||
entry->agent = callback_data->agent;
|
||||
entry->group = *group;
|
||||
entry->data = *callback_data;
|
||||
entry->data.kernel_name = strdup(callback_data->kernel_name);
|
||||
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
int main() {
|
||||
bool ret_val = false;
|
||||
const char* kiter_s = getenv("ROCP_KITER");
|
||||
const char* diter_s = getenv("ROCP_DITER");
|
||||
const unsigned kiter = (kiter_s != NULL) ? atol(kiter_s) : 1;
|
||||
const unsigned diter = (diter_s != NULL) ? atol(diter_s) : 1;
|
||||
|
||||
// Instantiate HSA resources
|
||||
HsaRsrcFactory::Create();
|
||||
|
||||
// Getting GPU device info
|
||||
const AgentInfo* agent_info = NULL;
|
||||
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) abort();
|
||||
|
||||
// Creating the queue
|
||||
hsa_queue_t* queue = NULL;
|
||||
if (HsaRsrcFactory::Instance().CreateQueue(agent_info, 128, &queue) == false) abort();
|
||||
|
||||
// Adding dispatch observer
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{};
|
||||
callbacks_ptrs.dispatch = dispatch_callback;
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL);
|
||||
|
||||
// Test initialization
|
||||
TestHsa::SetQueue(queue);
|
||||
TestHsa::HsaInstantiate(0);
|
||||
|
||||
for (unsigned ind = 0; ind < kiter; ++ind) {
|
||||
printf("Iterastion %u:\n", ind);
|
||||
ret_val = RunKernel<DummyKernel, TestAql>(0, NULL, diter);
|
||||
if (ret_val) ret_val = RunKernel<SimpleConvolution, TestAql>(0, NULL, diter);
|
||||
}
|
||||
|
||||
TestHsa::HsaShutdown();
|
||||
|
||||
return (ret_val) ? 0 : 1;
|
||||
}
|
||||
@@ -29,6 +29,9 @@ THE SOFTWARE.
|
||||
template <class Kernel, class Test> bool RunKernel(int argc = 0, char* argv[] = NULL, int count = 1) {
|
||||
bool ret_val = false;
|
||||
|
||||
if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL);
|
||||
|
||||
|
||||
// Create test kernel object
|
||||
Kernel test_kernel;
|
||||
TestAql* test_aql = new TestHsa(&test_kernel);
|
||||
|
||||
+12
-10
@@ -50,15 +50,6 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
|
||||
return NULL;
|
||||
}
|
||||
std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl;
|
||||
|
||||
// Create an instance of Aql Queue
|
||||
if (hsa_queue_ == NULL) {
|
||||
uint32_t num_pkts = 128;
|
||||
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
|
||||
hsa_queue_ = NULL;
|
||||
TEST_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
return hsa_rsrc_;
|
||||
}
|
||||
@@ -74,6 +65,15 @@ void TestHsa::HsaShutdown() {
|
||||
bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
|
||||
std::clog << "TestHsa::Initialize :" << std::endl;
|
||||
|
||||
// Create an instance of Aql Queue
|
||||
if (hsa_queue_ == NULL) {
|
||||
uint32_t num_pkts = 128;
|
||||
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
|
||||
hsa_queue_ = NULL;
|
||||
TEST_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate a Timer object
|
||||
setup_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
@@ -222,7 +222,7 @@ bool TestHsa::Run() {
|
||||
// Submit AQL packet to the queue
|
||||
const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql);
|
||||
|
||||
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl;
|
||||
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush;
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
|
||||
@@ -283,5 +283,7 @@ void TestHsa::PrintTime() {
|
||||
bool TestHsa::Cleanup() {
|
||||
hsa_executable_destroy(hsa_exec_);
|
||||
hsa_signal_destroy(hsa_signal_);
|
||||
hsa_queue_destroy(hsa_queue_);
|
||||
hsa_queue_ = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
+13
-5
@@ -32,9 +32,17 @@ export ROCPROFILER_LOG=1
|
||||
unset ROCP_PROXY_QUEUE
|
||||
# ROC profiler metrics config file
|
||||
export ROCP_METRICS=metrics.xml
|
||||
# test trace
|
||||
export ROC_TEST_TRACE=1
|
||||
|
||||
# tool library loaded by ROC profiler
|
||||
export ROCP_TOOL_LIB=./test/libintercept_test.so
|
||||
../bin/run_tool.sh ./test/ctrl
|
||||
|
||||
unset ROCP_TOOL_LIB
|
||||
eval ./test/standalone_test
|
||||
|
||||
|
||||
# tool library loaded by ROC profiler
|
||||
export ROCP_TOOL_LIB=libtool.so
|
||||
# ROC profiler kernels timing
|
||||
@@ -47,16 +55,16 @@ if [ ! -e $ROCP_TOOL_LIB ] ; then
|
||||
export ROCP_TOOL_LIB=test/libtool.so
|
||||
fi
|
||||
|
||||
export ROCP_KITER=100
|
||||
export ROCP_DITER=100
|
||||
export ROCP_INPUT=input.xml
|
||||
eval ./test/ctrl
|
||||
|
||||
export ROCP_KITER=1
|
||||
export ROCP_DITER=4
|
||||
export ROCP_INPUT=input1.xml
|
||||
eval ./test/ctrl
|
||||
|
||||
export ROCP_KITER=100
|
||||
export ROCP_DITER=100
|
||||
export ROCP_INPUT=input.xml
|
||||
eval ./test/ctrl
|
||||
|
||||
#valgrind --leak-check=full $tbin
|
||||
#valgrind --tool=massif $tbin
|
||||
#ms_print massif.out.<N>
|
||||
|
||||
+2
-1
@@ -417,7 +417,8 @@ bool dump_context_entry(context_entry_t* entry) {
|
||||
index,
|
||||
entry->data.queue_index,
|
||||
nik_name.c_str());
|
||||
if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)",
|
||||
if (record) fprintf(file_handle, ", gpu-id(%u), time(%lu,%lu,%lu,%lu)",
|
||||
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
record->dispatch,
|
||||
record->begin,
|
||||
record->end,
|
||||
|
||||
Reference in New Issue
Block a user