dispatch data th-id, queue-id, kernel-object

Change-Id: Ic5593603c6587cad17be33618d011a9aefc648ab
This commit is contained in:
Evgeny
2018-10-16 19:33:48 -05:00
parent 13fe0f9f69
commit c05bded17c
11 changed files with 523 additions and 23 deletions
+36
View File
@@ -0,0 +1,36 @@
#!/bin/sh
BIN_DIR=`dirname $0`
BIN_DIR=`cd $BIN_DIR; pwd`
PKG_DIR=`echo $BIN_DIR | sed "s/\/bin\/*//"`
BIN_DIR=$PKG_DIR/bin
# PATH to custom HSA libs
HSA_PATH=$PKG_DIR/lib/hsa
if [ -z "$1" ] ; then
echo "Usage: $0 <cmd line>"
else
# profiler plugin library
test_app=$*
# paths to ROC profiler and oher libraries
export LD_LIBRARY_PATH=$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH
export PATH=.:$PATH
# ROC profiler library loaded by HSA runtime
export HSA_TOOLS_LIB=librocprofiler64.so.1
# tool library loaded by ROC profiler
if [ -z $ROCP_TOOL_LIB ] ; then
export ROCP_TOOL_LIB=libintercept_test.so
fi
# enable error messages
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
export HSA_VEN_AMD_AQLPROFILE_LOG=1
export ROCPROFILER_LOG=1
# ROC profiler metrics config file
unset ROCP_PROXY_QUEUE
# ROC profiler metrics config file
export ROCP_METRICS=$BIN_DIR/lib/metrics.xml
LD_PRELOAD=$ROCP_TOOL_LIB $test_app
fi
+4 -1
View File
@@ -46,7 +46,7 @@ THE SOFTWARE.
#include <hsa_ven_amd_aqlprofile.h>
#include <stdint.h>
#define ROCPROFILER_VERSION_MAJOR 3
#define ROCPROFILER_VERSION_MAJOR 4
#define ROCPROFILER_VERSION_MINOR 0
#ifdef __cplusplus
@@ -217,8 +217,11 @@ typedef struct {
uint32_t agent_index; // GPU index
const hsa_queue_t* queue; // HSA queue
uint64_t queue_index; // Index in the queue
uint32_t queue_id; // Queue id
const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet
const char* kernel_name; // Kernel name
uint64_t kernel_object; // Kernel object pointer
int64_t thread_id; // Thread id
const rocprofiler_dispatch_record_t* record; // Dispatch record
} rocprofiler_callback_data_t;
+1
View File
@@ -37,5 +37,6 @@ const char* InterceptQueue::kernel_none_ = "";
Tracker* InterceptQueue::tracker_ = NULL;
bool InterceptQueue::tracker_on_ = false;
bool InterceptQueue::in_constr_call_ = false;
InterceptQueue::queue_id_t InterceptQueue::current_queue_id = 0;
} // namespace rocprofiler
+18 -4
View File
@@ -26,6 +26,7 @@ THE SOFTWARE.
#include <amd_hsa_kernel_code.h>
#include <cxxabi.h>
#include <dlfcn.h>
#include <sys/syscall.h>
#include <atomic>
#include <iostream>
@@ -49,6 +50,7 @@ class InterceptQueue {
typedef std::map<uint64_t, InterceptQueue*> obj_map_t;
typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data);
typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg);
typedef uint32_t queue_id_t;
static void HsaIntercept(HsaApiTable* table);
@@ -79,6 +81,8 @@ class InterceptQueue {
(*obj_map_)[(uint64_t)(*queue)] = obj;
status = proxy->SetInterceptCB(OnSubmitCB, obj);
obj->queue_event_callback_ = callback;
obj->queue_id = current_queue_id;
++current_queue_id;
in_constr_call_ = false;
return status;
@@ -139,13 +143,17 @@ class InterceptQueue {
}
// Prepareing dispatch callback data
const char* kernel_name = GetKernelName(dispatch_packet);
uint64_t kernel_symbol = GetKernelSymbol(dispatch_packet);
const char* kernel_name = GetKernelName(kernel_symbol);
rocprofiler_callback_data_t data = {obj->agent_info_->dev_id,
obj->agent_info_->dev_index,
obj->queue_,
user_que_idx,
obj->queue_id,
dispatch_packet,
kernel_name,
kernel_symbol,
syscall(__NR_gettid),
(tracker_entry) ? tracker_entry->record : NULL};
// Calling dispatch callback
@@ -222,7 +230,7 @@ class InterceptQueue {
return static_cast<hsa_packet_type_t>((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask);
}
static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
static uint64_t GetKernelSymbol(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
const amd_kernel_code_t* kernel_code = NULL;
hsa_status_t status =
util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
@@ -231,8 +239,12 @@ class InterceptQueue {
if (HSA_STATUS_SUCCESS != status) {
kernel_code = reinterpret_cast<amd_kernel_code_t*>(dispatch_packet->kernel_object);
}
amd_runtime_loader_debug_info_t* dbg_info = reinterpret_cast<amd_runtime_loader_debug_info_t*>(
kernel_code->runtime_loader_kernel_symbol);
return kernel_code->runtime_loader_kernel_symbol;
}
static const char* GetKernelName(const uint64_t kernel_symbol) {
amd_runtime_loader_debug_info_t* dbg_info =
reinterpret_cast<amd_runtime_loader_debug_info_t*>(kernel_symbol);
const char* kernel_name = (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
// Kernel name is mangled name
@@ -298,11 +310,13 @@ class InterceptQueue {
static Tracker* tracker_;
static bool tracker_on_;
static bool in_constr_call_;
static queue_id_t current_queue_id;
hsa_queue_t* const queue_;
ProxyQueue* const proxy_;
const util::AgentInfo* agent_info_;
queue_event_callback_t queue_event_callback_;
queue_id_t queue_id;
};
} // namespace rocprofiler
+14 -2
View File
@@ -37,11 +37,18 @@ file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
## Standalone test sources
set ( STEXE_NAME "standalone_test" )
set ( STST_SRC
set ( STTST_SRC
${TEST_DIR}/app/standalone_test.cpp
${TEST_DIR}/ctrl/test_hsa.cpp
)
## Intercept test sources
set ( INEXE_NAME "intercept_test" )
set ( INTST_SRC
${TEST_DIR}/app/intercept_test.cpp
${TEST_DIR}/ctrl/test_hsa.cpp
)
## Test control sources
set ( CTRL_SRC
${TEST_DIR}/app/test.cpp
@@ -58,10 +65,15 @@ set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
## Building standalone test executable
add_executable ( ${STEXE_NAME} ${STST_SRC} ${UTIL_SRC} ${KERN_SRC} )
add_executable ( ${STEXE_NAME} ${STTST_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${STEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${STEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
## Building intercept test executable
add_library ( ${INEXE_NAME} SHARED ${INTST_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${INEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${INEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
## Building ctrl test executable
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
+231
View File
@@ -0,0 +1,231 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include <hsa.h>
#include <string.h>
#include <unistd.h>
#include <dlfcn.h>
#include <iostream>
#include <vector>
#include <atomic>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
#include "ctrl/test_hsa.h"
#include "inc/rocprofiler.h"
#include "dummy_kernel/dummy_kernel.h"
#include "simple_convolution/simple_convolution.h"
#include "util/test_assert.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
// Dispatch callbacks and context handlers synchronization
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
// Tool is unloaded
volatile bool is_loaded = false;
// Error handler
void fatal(const std::string msg) {
fflush(stdout);
fprintf(stderr, "%s\n\n", msg.c_str());
fflush(stderr);
abort();
}
// Check returned HSA API status
void check_status(hsa_status_t status) {
if (status != HSA_STATUS_SUCCESS) {
const char* error_string = NULL;
rocprofiler_error_string(&error_string);
fprintf(stderr, "ERROR: %s\n", error_string);
abort();
}
}
// Context stored entry type
struct context_entry_t {
bool valid;
hsa_agent_t agent;
rocprofiler_group_t group;
rocprofiler_callback_data_t data;
};
// Dump stored context entry
void dump_context_entry(context_entry_t* entry) {
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
while (valid->load() == false) sched_yield();
const std::string kernel_name = entry->data.kernel_name;
const rocprofiler_dispatch_record_t* record = entry->data.record;
fflush(stdout);
fprintf(stdout, "kernel symbol(0x%lx) name(\"%s\") tid(%ld) queue-id(%u) gpu-id(%u) ",
entry->data.kernel_object,
kernel_name.c_str(),
entry->data.thread_id,
entry->data.queue_id,
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index);
if (record) fprintf(stdout, "time(%lu,%lu,%lu,%lu)",
record->dispatch,
record->begin,
record->end,
record->complete);
fprintf(stdout, "\n");
fflush(stdout);
rocprofiler_group_t& group = entry->group;
if (group.context == NULL) {
fprintf(stderr, "tool error: context is NULL\n");
abort();
}
rocprofiler_close(group.context);
}
// Profiling completion handler
// Dump and delete the context entry
// Return true if the context was dumped successfully
bool context_handler(rocprofiler_group_t group, void* arg) {
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
dump_context_entry(entry);
delete entry;
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
abort();
}
return false;
}
// Kernel disoatch callback
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/,
rocprofiler_group_t* group) {
// HSA status
hsa_status_t status = HSA_STATUS_ERROR;
// Profiling context
rocprofiler_t* context = NULL;
// Context entry
context_entry_t* entry = new context_entry_t();
// context properties
rocprofiler_properties_t properties{};
properties.handler = context_handler;
properties.handler_arg = (void*)entry;
// Open profiling context
status = rocprofiler_open(callback_data->agent, NULL, 0,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
check_status(status);
// Get group[0]
status = rocprofiler_get_group(context, 0, group);
check_status(status);
// Fill profiling context entry
entry->agent = callback_data->agent;
entry->group = *group;
entry->data = *callback_data;
entry->data.kernel_name = strdup(callback_data->kernel_name);
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
return HSA_STATUS_SUCCESS;
}
void initialize() {
// Getting GPU device info
const AgentInfo* agent_info = NULL;
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) {
fprintf(stderr, "GetGpuAgentInfo failed\n");
abort();
}
// Adding dispatch observer
rocprofiler_queue_callbacks_t callbacks_ptrs{};
callbacks_ptrs.dispatch = dispatch_callback;
rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL);
}
void cleanup() {
// Unregister dispatch callback
rocprofiler_remove_queue_callbacks();
// Dump stored profiling output data
fflush(stdout);
}
// Tool constructor
extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
{
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
if (is_loaded) return;
is_loaded = true;
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
abort();
}
// Enable timestamping
settings->timestamp_on = true;
// Initialize profiling
initialize();
}
// Tool destructor
extern "C" PUBLIC_API void OnUnloadTool() {
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
if (!is_loaded) return;
is_loaded = false;
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
abort();
}
// Final resources cleanup
cleanup();
}
extern "C" CONSTRUCTOR_API void constructor() {
printf("INTT constructor\n"); fflush(stdout);
}
extern "C" DESTRUCTOR_API void destructor() {
if (is_loaded == true) OnUnloadTool();
}
+189
View File
@@ -0,0 +1,189 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include <hsa.h>
#include <string.h>
#include <unistd.h>
#include <iostream>
#include <vector>
#include <atomic>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
#include "ctrl/test_hsa.h"
#include "inc/rocprofiler.h"
#include "dummy_kernel/dummy_kernel.h"
#include "simple_convolution/simple_convolution.h"
#include "util/test_assert.h"
// Dispatch callbacks and context handlers synchronization
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
// Error handler
void fatal(const std::string msg) {
fflush(stdout);
fprintf(stderr, "%s\n\n", msg.c_str());
fflush(stderr);
abort();
}
// Check returned HSA API status
void check_status(hsa_status_t status) {
if (status != HSA_STATUS_SUCCESS) {
const char* error_string = NULL;
rocprofiler_error_string(&error_string);
fprintf(stderr, "ERROR: %s\n", error_string);
abort();
}
}
// Context stored entry type
struct context_entry_t {
bool valid;
hsa_agent_t agent;
rocprofiler_group_t group;
rocprofiler_callback_data_t data;
};
// Dump stored context entry
void dump_context_entry(context_entry_t* entry) {
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
while (valid->load() == false) sched_yield();
const std::string kernel_name = entry->data.kernel_name;
const rocprofiler_dispatch_record_t* record = entry->data.record;
fflush(stdout);
fprintf(stdout, "kernel symbol(0x%lx) name(\"%s\")", entry->data.kernel_object, kernel_name.c_str());
if (record) fprintf(stdout, ", gpu-id(%u), time(%lu,%lu,%lu,%lu)",
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
record->dispatch,
record->begin,
record->end,
record->complete);
fprintf(stdout, "\n");
fflush(stdout);
rocprofiler_group_t& group = entry->group;
if (group.context == NULL) {
fprintf(stderr, "tool error: context is NULL\n");
abort();
}
rocprofiler_close(group.context);
}
// Profiling completion handler
// Dump and delete the context entry
// Return true if the context was dumped successfully
bool context_handler(rocprofiler_group_t group, void* arg) {
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
dump_context_entry(entry);
delete entry;
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
abort();
}
return false;
}
// Kernel disoatch callback
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/,
rocprofiler_group_t* group) {
// HSA status
hsa_status_t status = HSA_STATUS_ERROR;
// Profiling context
rocprofiler_t* context = NULL;
// Context entry
context_entry_t* entry = new context_entry_t();
// context properties
rocprofiler_properties_t properties{};
properties.handler = context_handler;
properties.handler_arg = (void*)entry;
// Open profiling context
status = rocprofiler_open(callback_data->agent, NULL, 0,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
check_status(status);
// Get group[0]
status = rocprofiler_get_group(context, 0, group);
check_status(status);
// Fill profiling context entry
entry->agent = callback_data->agent;
entry->group = *group;
entry->data = *callback_data;
entry->data.kernel_name = strdup(callback_data->kernel_name);
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
return HSA_STATUS_SUCCESS;
}
int main() {
bool ret_val = false;
const char* kiter_s = getenv("ROCP_KITER");
const char* diter_s = getenv("ROCP_DITER");
const unsigned kiter = (kiter_s != NULL) ? atol(kiter_s) : 1;
const unsigned diter = (diter_s != NULL) ? atol(diter_s) : 1;
// Instantiate HSA resources
HsaRsrcFactory::Create();
// Getting GPU device info
const AgentInfo* agent_info = NULL;
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) abort();
// Creating the queue
hsa_queue_t* queue = NULL;
if (HsaRsrcFactory::Instance().CreateQueue(agent_info, 128, &queue) == false) abort();
// Adding dispatch observer
rocprofiler_queue_callbacks_t callbacks_ptrs{};
callbacks_ptrs.dispatch = dispatch_callback;
rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL);
// Test initialization
TestHsa::SetQueue(queue);
TestHsa::HsaInstantiate(0);
for (unsigned ind = 0; ind < kiter; ++ind) {
printf("Iterastion %u:\n", ind);
ret_val = RunKernel<DummyKernel, TestAql>(0, NULL, diter);
if (ret_val) ret_val = RunKernel<SimpleConvolution, TestAql>(0, NULL, diter);
}
TestHsa::HsaShutdown();
return (ret_val) ? 0 : 1;
}
+3
View File
@@ -29,6 +29,9 @@ THE SOFTWARE.
template <class Kernel, class Test> bool RunKernel(int argc = 0, char* argv[] = NULL, int count = 1) {
bool ret_val = false;
if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL);
// Create test kernel object
Kernel test_kernel;
TestAql* test_aql = new TestHsa(&test_kernel);
+12 -10
View File
@@ -50,15 +50,6 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
return NULL;
}
std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl;
// Create an instance of Aql Queue
if (hsa_queue_ == NULL) {
uint32_t num_pkts = 128;
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
hsa_queue_ = NULL;
TEST_ASSERT(false);
}
}
}
return hsa_rsrc_;
}
@@ -74,6 +65,15 @@ void TestHsa::HsaShutdown() {
bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
std::clog << "TestHsa::Initialize :" << std::endl;
// Create an instance of Aql Queue
if (hsa_queue_ == NULL) {
uint32_t num_pkts = 128;
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
hsa_queue_ = NULL;
TEST_ASSERT(false);
}
}
// Instantiate a Timer object
setup_timer_idx_ = hsa_timer_.CreateTimer();
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
@@ -222,7 +222,7 @@ bool TestHsa::Run() {
// Submit AQL packet to the queue
const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql);
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl;
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush;
// Wait on the dispatch signal until the kernel is finished.
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
@@ -283,5 +283,7 @@ void TestHsa::PrintTime() {
bool TestHsa::Cleanup() {
hsa_executable_destroy(hsa_exec_);
hsa_signal_destroy(hsa_signal_);
hsa_queue_destroy(hsa_queue_);
hsa_queue_ = NULL;
return true;
}
+13 -5
View File
@@ -32,9 +32,17 @@ export ROCPROFILER_LOG=1
unset ROCP_PROXY_QUEUE
# ROC profiler metrics config file
export ROCP_METRICS=metrics.xml
# test trace
export ROC_TEST_TRACE=1
# tool library loaded by ROC profiler
export ROCP_TOOL_LIB=./test/libintercept_test.so
../bin/run_tool.sh ./test/ctrl
unset ROCP_TOOL_LIB
eval ./test/standalone_test
# tool library loaded by ROC profiler
export ROCP_TOOL_LIB=libtool.so
# ROC profiler kernels timing
@@ -47,16 +55,16 @@ if [ ! -e $ROCP_TOOL_LIB ] ; then
export ROCP_TOOL_LIB=test/libtool.so
fi
export ROCP_KITER=100
export ROCP_DITER=100
export ROCP_INPUT=input.xml
eval ./test/ctrl
export ROCP_KITER=1
export ROCP_DITER=4
export ROCP_INPUT=input1.xml
eval ./test/ctrl
export ROCP_KITER=100
export ROCP_DITER=100
export ROCP_INPUT=input.xml
eval ./test/ctrl
#valgrind --leak-check=full $tbin
#valgrind --tool=massif $tbin
#ms_print massif.out.<N>
+2 -1
View File
@@ -417,7 +417,8 @@ bool dump_context_entry(context_entry_t* entry) {
index,
entry->data.queue_index,
nik_name.c_str());
if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)",
if (record) fprintf(file_handle, ", gpu-id(%u), time(%lu,%lu,%lu,%lu)",
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
record->dispatch,
record->begin,
record->end,