Merge "standalone/sampling mode" into amd-master
[ROCm/rocprofiler commit: e56fc0adda]
Этот коммит содержится в:
@@ -224,10 +224,9 @@ typedef struct {
|
||||
|
||||
// Profiling callback type
|
||||
typedef hsa_status_t (*rocprofiler_callback_t)(
|
||||
const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on
|
||||
// the callback API id
|
||||
const rocprofiler_callback_data_t* callback_data, // [in] callback data
|
||||
void* user_data, // [in/out] user data passed to the callback
|
||||
rocprofiler_group_t* group); // [out] profiling group
|
||||
rocprofiler_group_t* group); // [out] returned profiling group
|
||||
|
||||
// Queue callbacks
|
||||
typedef struct {
|
||||
|
||||
@@ -32,15 +32,6 @@ namespace rocprofiler {
|
||||
|
||||
class HsaQueue : public Queue {
|
||||
public:
|
||||
typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet);
|
||||
enum {
|
||||
LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t),
|
||||
LEGACY_SLOT_SIZE_P = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
|
||||
};
|
||||
struct slot_pm4_t {
|
||||
packet_word_t words[LEGACY_SLOT_SIZE_W];
|
||||
};
|
||||
|
||||
HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
|
||||
@@ -195,7 +195,7 @@ class MetricsDict {
|
||||
}
|
||||
|
||||
static hsa_ven_amd_aqlprofile_id_query_t Translate(const util::AgentInfo* agent_info, const std::string& block_name) {
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
hsa_ven_amd_aqlprofile_profile_t profile{};
|
||||
profile.agent = agent_info->dev_id;
|
||||
hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0};
|
||||
hsa_status_t status =
|
||||
|
||||
@@ -56,6 +56,16 @@ THE SOFTWARE.
|
||||
// Internal library methods
|
||||
//
|
||||
namespace rocprofiler {
|
||||
hsa_status_t CreateQueuePro(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue);
|
||||
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
|
||||
@@ -115,6 +125,11 @@ void RestoreHsaApi() {
|
||||
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
|
||||
}
|
||||
|
||||
void StandaloneIntercept() {
|
||||
::HsaApiTable* table = kHsaApiTable;
|
||||
table->core_->hsa_queue_create_fn = rocprofiler::CreateQueuePro;
|
||||
}
|
||||
|
||||
typedef void (*tool_handler_t)();
|
||||
typedef void (*tool_handler_prop_t)(rocprofiler_settings_t*);
|
||||
void * tool_handle = NULL;
|
||||
@@ -195,9 +210,7 @@ DESTRUCTOR_API void destructor() {
|
||||
const MetricsDict* GetMetrics(const hsa_agent_t& agent) {
|
||||
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
|
||||
const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent);
|
||||
if (agent_info == NULL) {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "agent is not found");
|
||||
}
|
||||
if (agent_info == NULL) EXC_RAISING(HSA_STATUS_ERROR, "agent is not found");
|
||||
const MetricsDict* metrics = MetricsDict::Create(agent_info);
|
||||
if (metrics == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed");
|
||||
return metrics;
|
||||
@@ -209,6 +222,94 @@ hsa_status_t GetExcStatus(const std::exception& e) {
|
||||
: HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
|
||||
inline size_t CreateEnableCmd(const hsa_agent_t& agent, packet_t* command, const size_t& slot_count) {
|
||||
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
|
||||
const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent);
|
||||
const bool is_legacy = (strncmp(agent_info->name, "gfx8", 4) == 0);
|
||||
const size_t packet_count = (is_legacy) ? Profile::LEGACY_SLOT_SIZE_PKT : 1;
|
||||
|
||||
if (packet_count > slot_count) EXC_RAISING(HSA_STATUS_ERROR, "packet_count > slot_count");
|
||||
|
||||
// AQLprofile object
|
||||
hsa_ven_amd_aqlprofile_profile_t profile{};
|
||||
profile.agent = agent_info->dev_id;
|
||||
// Query for cmd buffer size
|
||||
hsa_status_t status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, NULL);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).size exc");
|
||||
if (profile.command_buffer.size == 0) EXC_RAISING(status, "get_info(ENABLE_CMD).size == 0");
|
||||
// Allocate cmd buffer
|
||||
const size_t aligment_mask = 0x100 - 1;
|
||||
profile.command_buffer.ptr =
|
||||
hsa_rsrc->AllocateSysMemory(agent_info, profile.command_buffer.size);
|
||||
if ((reinterpret_cast<uintptr_t>(profile.command_buffer.ptr) & aligment_mask) != 0) {
|
||||
EXC_RAISING(status, "profile.command_buffer.ptr bad alignment");
|
||||
}
|
||||
|
||||
// Generating cmd packet
|
||||
if (is_legacy) {
|
||||
packet_t packet{};
|
||||
|
||||
// Query for cmd buffer data
|
||||
status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, &packet);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc");
|
||||
|
||||
// Check for legacy GFXIP
|
||||
status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_legacy_get_pm4(&packet, command);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
} else {
|
||||
// Query for cmd buffer data
|
||||
status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, command);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc");
|
||||
}
|
||||
|
||||
// Return cmd packet data size
|
||||
return (packet_count * sizeof(packet_t));
|
||||
}
|
||||
|
||||
hsa_status_t CreateQueuePro(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue)
|
||||
{
|
||||
static packet_t enable_cmd_packet[Profile::LEGACY_SLOT_SIZE_PKT];
|
||||
static size_t enable_cmd_size = 0;
|
||||
static std::mutex enable_cmd_mutex;
|
||||
|
||||
// Create HSA queue
|
||||
hsa_status_t status = hsa_queue_create_fn(
|
||||
agent,
|
||||
size,
|
||||
type,
|
||||
callback,
|
||||
data,
|
||||
private_segment_size,
|
||||
group_segment_size,
|
||||
queue);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
// Create 'Enable' cmd packet
|
||||
if (enable_cmd_size == 0) {
|
||||
std::lock_guard<std::mutex> lck(enable_cmd_mutex);
|
||||
if (enable_cmd_size == 0) {
|
||||
enable_cmd_size = CreateEnableCmd(agent, enable_cmd_packet, Profile::LEGACY_SLOT_SIZE_PKT);
|
||||
}
|
||||
}
|
||||
|
||||
// Enable counters for the queue
|
||||
rocprofiler::util::HsaRsrcFactory::Instance().Submit(*queue, enable_cmd_packet, enable_cmd_size);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_properties_t rocprofiler_properties;
|
||||
uint32_t SqttProfile::output_buffer_size_ = 0x2000000; // 32M
|
||||
bool SqttProfile::output_buffer_local_ = true;
|
||||
@@ -261,7 +362,10 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
|
||||
if (intercept_mode) {
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
rocprofiler::InterceptQueue::HsaIntercept(table);
|
||||
} else {
|
||||
rocprofiler::StandaloneIntercept();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -550,7 +550,7 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
const uint32_t slot_size_b = 0x40;
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
|
||||
// adevance command queue
|
||||
const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue);
|
||||
@@ -578,7 +578,7 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) {
|
||||
const uint32_t slot_size_b = 0x40;
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
if ((size_bytes & (slot_size_b - 1)) != 0) {
|
||||
fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes);
|
||||
abort();
|
||||
|
||||
@@ -154,6 +154,7 @@ class HsaTimer {
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
static const size_t CMD_SLOT_SIZE_B = 0x40;
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef HsaTimer::timestamp_t timestamp_t;
|
||||
|
||||
|
||||
@@ -35,19 +35,35 @@ endif ()
|
||||
## Util sources
|
||||
file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
|
||||
|
||||
## Standalone test sources
|
||||
set ( STEXE_NAME "standalone_test" )
|
||||
set ( STST_SRC
|
||||
${TEST_DIR}/app/standalone_test.cpp
|
||||
${TEST_DIR}/ctrl/test_hsa.cpp
|
||||
)
|
||||
|
||||
## Test control sources
|
||||
set ( CTRL_SRC
|
||||
${TEST_DIR}/app/test.cpp
|
||||
${TEST_DIR}/ctrl/test_hsa.cpp
|
||||
)
|
||||
|
||||
## Test kernel sources
|
||||
## Dummy kernel
|
||||
set ( DUMMY_NAME dummy_kernel )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${DUMMY_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
## Test kernel
|
||||
set ( TEST_NAME simple_convolution )
|
||||
set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
## Building test executable
|
||||
add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} )
|
||||
## Building standalone test executable
|
||||
add_executable ( ${STEXE_NAME} ${STST_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
target_include_directories ( ${STEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${STEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
|
||||
|
||||
## Building ctrl test executable
|
||||
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
@@ -22,97 +22,25 @@ THE SOFTWARE.
|
||||
|
||||
#include <hsa.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "dummy_kernel/dummy_kernel.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool ret_val = false;
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
// Profiling properties
|
||||
rocprofiler_properties_t properties;
|
||||
// Number of context invocation
|
||||
uint32_t invocation = 0;
|
||||
|
||||
#if 0
|
||||
// Profiling info objects
|
||||
const unsigned info_count = 1;
|
||||
rocprofiler_info_t info[info_count];
|
||||
// PMC events
|
||||
memset(info, 0, sizeof(info));
|
||||
info[0].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[0].name = "SQ_WAVES";
|
||||
#else
|
||||
// Profiling info objects
|
||||
const unsigned info_count = 3;
|
||||
rocprofiler_info_t info[info_count];
|
||||
// PMC events
|
||||
memset(info, 0, sizeof(info));
|
||||
info[0].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[0].name = "SQ_WAVES";
|
||||
info[1].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[1].name = "SQ_ITEMS";
|
||||
// Tracing parameters
|
||||
const unsigned parameter_count = 2;
|
||||
rocprofiler_parameter_t parameters[parameter_count];
|
||||
info[2].name = "THREAD_TRACE";
|
||||
info[2].type = ROCPROFILER_TYPE_TRACE;
|
||||
info[2].parameters = parameters;
|
||||
info[2].parameter_count = parameter_count;
|
||||
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
|
||||
parameters[0].value = 0;
|
||||
parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
|
||||
parameters[1].value = 0;
|
||||
#endif
|
||||
|
||||
// Creating profiling context
|
||||
properties = {};
|
||||
properties.queue_depth = 128;
|
||||
status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context,
|
||||
ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_OWNQUEUE, &properties);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
TestHsa::SetQueue(properties.queue);
|
||||
|
||||
// Adding dispatch observer
|
||||
status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Querying the number of context invocation
|
||||
status = rocprofiler_invocation(context, &invocation);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Dispatching profiled kernel n-times to collect all counter groups data
|
||||
unsigned n = 0;
|
||||
while (1) {
|
||||
std::cout << "> " << n << "/" << invocation << std::endl;
|
||||
#if 0
|
||||
status = rocprofiler_start(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
|
||||
status = rocprofiler_stop(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
#else
|
||||
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
|
||||
#endif
|
||||
status = rocprofiler_sample(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
for (rocprofiler_info_t* p = info; p < info + info_count; ++p) {
|
||||
std::cout << (p - info) << ": " << p->name;
|
||||
void print_features(rocprofiler_feature_t* feature, uint32_t feature_count) {
|
||||
for (rocprofiler_feature_t* p = feature; p < feature + feature_count; ++p) {
|
||||
std::cout << (p - feature) << ": " << p->name;
|
||||
switch (p->data.kind) {
|
||||
case ROCPROFILER_DATA_KIND_INT64:
|
||||
std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl;
|
||||
std::cout << std::dec << " result64 (" << p->data.result_int64 << ")" << std::endl;
|
||||
break;
|
||||
case ROCPROFILER_BYTES: {
|
||||
case ROCPROFILER_DATA_KIND_BYTES: {
|
||||
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
|
||||
uint64_t size = 0;
|
||||
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
|
||||
@@ -130,15 +58,119 @@ int main(int argc, char** argv) {
|
||||
TEST_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++n;
|
||||
if (n < invocation) {
|
||||
status = rocprofiler_next(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
void read_features(uint32_t n, rocprofiler_t* context, rocprofiler_feature_t* feature, const unsigned feature_count) {
|
||||
std::cout << "read features" << std::endl;
|
||||
hsa_status_t status = rocprofiler_read(context, n);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
std::cout << "read issue" << std::endl;
|
||||
status = rocprofiler_get_data(context, n);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
status = rocprofiler_get_metrics(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
print_features(feature, feature_count);
|
||||
}
|
||||
|
||||
int main() {
|
||||
bool ret_val = false;
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
// Profiling properties
|
||||
rocprofiler_properties_t properties;
|
||||
|
||||
// Profiling feature objects
|
||||
const unsigned feature_count = 9;
|
||||
rocprofiler_feature_t feature[feature_count];
|
||||
// PMC events
|
||||
memset(feature, 0, sizeof(feature));
|
||||
feature[0].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[0].name = "GRBM_COUNT";
|
||||
feature[1].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[1].name = "GRBM_GUI_ACTIVE";
|
||||
feature[2].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[2].name = "GPUBusy";
|
||||
feature[3].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[3].name = "SQ_WAVES";
|
||||
feature[4].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[4].name = "SQ_INSTS_VALU";
|
||||
feature[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[5].name = "VALUInsts";
|
||||
feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[6].name = "TCC_HIT_sum";
|
||||
feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[7].name = "TCC_MISS_sum";
|
||||
feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[8].name = "WRITE_SIZE";
|
||||
// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// feature[8].name = "TCC_EA_WRREQ_sum";
|
||||
// feature[9].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// feature[9].name = "TCC_EA_WRREQ_64B_sum";
|
||||
#if 0
|
||||
// Tracing parameters
|
||||
const unsigned parameter_count = 2;
|
||||
rocprofiler_parameter_t parameters[parameter_count];
|
||||
feature[2].name = "THREAD_TRACE";
|
||||
feature[2].kind = ROCPROFILER_FEATURE_KIND_TRACE;
|
||||
feature[2].parameters = parameters;
|
||||
feature[2].parameter_count = parameter_count;
|
||||
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
|
||||
parameters[0].value = 0;
|
||||
parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
|
||||
parameters[1].value = 0;
|
||||
#endif
|
||||
|
||||
// Instantiate HSA resources
|
||||
HsaRsrcFactory::Create();
|
||||
|
||||
// Getting GPU device info
|
||||
const AgentInfo* agent_info = NULL;
|
||||
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) abort();
|
||||
|
||||
// Creating the queues pool
|
||||
const unsigned queue_count = 16;
|
||||
hsa_queue_t* queue[queue_count];
|
||||
for (unsigned queue_ind = 0; queue_ind < queue_count; ++queue_ind) {
|
||||
if (HsaRsrcFactory::Instance().CreateQueue(agent_info, 128, &queue[queue_ind]) == false) abort();
|
||||
}
|
||||
hsa_queue_t* prof_queue = queue[0];
|
||||
|
||||
// Creating profiling context
|
||||
properties = {};
|
||||
properties.queue = prof_queue;
|
||||
status = rocprofiler_open(agent_info->dev_id, feature, feature_count, &context,
|
||||
ROCPROFILER_MODE_STANDALONE, &properties);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Test initialization
|
||||
TestHsa::SetQueue(prof_queue);
|
||||
TestHsa::HsaInstantiate(0);
|
||||
|
||||
// Dispatching profiled kernel n-times to collect all counter groups data
|
||||
const unsigned group_n = 0;
|
||||
status = rocprofiler_start(context, group_n);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
std::cout << "start" << std::endl;
|
||||
|
||||
for (unsigned ind = 0; ind < 3; ++ind) {
|
||||
#if 1
|
||||
const unsigned queue_ind = ind % queue_count;
|
||||
TestHsa::SetQueue(queue[queue_ind]);
|
||||
// ret_val = RunKernel<DummyKernel, TestAql>();
|
||||
ret_val = RunKernel<SimpleConvolution, TestAql>();
|
||||
std::cout << "run kernel, queue " << queue_ind << std::endl;
|
||||
#else
|
||||
sleep(3);
|
||||
#endif
|
||||
read_features(group_n, context, feature, feature_count);
|
||||
}
|
||||
|
||||
// Stop counters
|
||||
status = rocprofiler_stop(context, group_n);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
std::cout << "stop" << std::endl;
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
|
||||
@@ -26,7 +26,7 @@ THE SOFTWARE.
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
template <class Kernel, class Test> bool RunKernel(int argc, char* argv[], int count = 1) {
|
||||
template <class Kernel, class Test> bool RunKernel(int argc = 0, char* argv[] = NULL, int count = 1) {
|
||||
bool ret_val = false;
|
||||
|
||||
// Create test kernel object
|
||||
|
||||
@@ -71,7 +71,7 @@ void TestHsa::HsaShutdown() {
|
||||
if (hsa_rsrc_) hsa_rsrc_->Destroy();
|
||||
}
|
||||
|
||||
bool TestHsa::Initialize(int arg_cnt, char** arg_list) {
|
||||
bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
|
||||
std::clog << "TestHsa::Initialize :" << std::endl;
|
||||
|
||||
// Instantiate a Timer object
|
||||
@@ -119,6 +119,8 @@ bool TestHsa::Setup() {
|
||||
mem_map_t& mem_map = test_->GetMemMap();
|
||||
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
|
||||
mem_descr_t& des = it->second;
|
||||
if (des.size == 0) continue;
|
||||
|
||||
switch (des.id) {
|
||||
case TestKernel::LOCAL_DES_ID:
|
||||
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
|
||||
@@ -245,6 +247,8 @@ bool TestHsa::VerifyResults() {
|
||||
const uint32_t size = test_->GetOutputSize();
|
||||
bool suc = false;
|
||||
|
||||
if (size == 0) return true;
|
||||
|
||||
// Copy local kernel output buffers from local memory into host memory
|
||||
if (test_->IsOutputLocal()) {
|
||||
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
|
||||
|
||||
@@ -35,6 +35,7 @@ class TestHsa : public TestAql {
|
||||
static HsaRsrcFactory* HsaInstantiate(const uint32_t agent_ind = agent_id_);
|
||||
static void HsaShutdown();
|
||||
static void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; }
|
||||
static hsa_agent_t HsaAgent() { return agent_info_->dev_id; }
|
||||
static uint32_t HsaAgentId() { return agent_id_; }
|
||||
|
||||
// Constructor
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
/**
|
||||
dummy kernel
|
||||
*/
|
||||
__kernel void DummyKernel() {
|
||||
uint tid = get_global_id(0);
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
|
||||
#define TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/test_kernel.h"
|
||||
|
||||
// Class implements DummyKernel kernel parameters
|
||||
class DummyKernel : public TestKernel {
|
||||
public:
|
||||
// Kernel buffers IDs
|
||||
enum { KERNARG_BUF_ID, LOCAL_BUF_ID };
|
||||
|
||||
// Constructor
|
||||
DummyKernel() :
|
||||
width_(64),
|
||||
height_(64)
|
||||
{
|
||||
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0);
|
||||
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0);
|
||||
}
|
||||
|
||||
// Initialize method
|
||||
void Init() {}
|
||||
|
||||
// Return compute grid size
|
||||
uint32_t GetGridSize() const { return width_ * height_; }
|
||||
|
||||
// Print output
|
||||
void PrintOutput(const void* ptr) const {}
|
||||
|
||||
// Return name
|
||||
std::string Name() const { return std::string("DummyKernel"); }
|
||||
|
||||
private:
|
||||
// Reference CPU implementation
|
||||
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height,
|
||||
const uint32_t maskWidth, const uint32_t maskHeight) { return true; }
|
||||
|
||||
// Width of the Input array
|
||||
const uint32_t width_;
|
||||
|
||||
// Height of the Input array
|
||||
const uint32_t height_;
|
||||
};
|
||||
|
||||
#endif // TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
|
||||
Исполняемый файл
Двоичные данные
Двоичный файл не отображается.
Исполняемый файл
Двоичные данные
Двоичный файл не отображается.
@@ -22,21 +22,21 @@
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
test_bin_dflt=./test/ctrl
|
||||
|
||||
# paths to ROC profiler and oher libraries
|
||||
export LD_LIBRARY_PATH=$PWD
|
||||
# enable error messages logging to '/tmp/rocprofiler_log.txt'
|
||||
export ROCPROFILER_LOG=1
|
||||
|
||||
# ROC profiler library loaded by HSA runtime
|
||||
export HSA_TOOLS_LIB=librocprofiler64.so
|
||||
# tool library loaded by ROC profiler
|
||||
export ROCP_TOOL_LIB=libtool.so
|
||||
# enable error messages logging to '/tmp/rocprofiler_log.txt'
|
||||
export ROCPROFILER_LOG=1
|
||||
# ROC profiler metrics config file
|
||||
unset ROCP_PROXY_QUEUE
|
||||
# ROC profiler metrics config file
|
||||
export ROCP_METRICS=metrics.xml
|
||||
|
||||
eval ./test/standalone_test
|
||||
|
||||
# tool library loaded by ROC profiler
|
||||
export ROCP_TOOL_LIB=libtool.so
|
||||
# ROC profiler kernels timing
|
||||
export ROCP_TIMESTAMP_ON=1
|
||||
# output directory for the tool library, for metrics results file 'results.txt'
|
||||
@@ -47,21 +47,15 @@ if [ ! -e $ROCP_TOOL_LIB ] ; then
|
||||
export ROCP_TOOL_LIB=test/libtool.so
|
||||
fi
|
||||
|
||||
if [ -n "$1" ] ; then
|
||||
tbin="$*"
|
||||
else
|
||||
tbin=$test_bin_dflt
|
||||
fi
|
||||
|
||||
export ROCP_KITER=100
|
||||
export ROCP_DITER=100
|
||||
export ROCP_INPUT=input.xml
|
||||
eval $tbin
|
||||
eval ./test/ctrl
|
||||
|
||||
#export ROCP_KITER=1
|
||||
#export ROCP_DITER=4
|
||||
#export ROCP_INPUT=input1.xml
|
||||
#eval $tbin
|
||||
export ROCP_KITER=1
|
||||
export ROCP_DITER=4
|
||||
export ROCP_INPUT=input1.xml
|
||||
eval ./test/ctrl
|
||||
|
||||
#valgrind --leak-check=full $tbin
|
||||
#valgrind --tool=massif $tbin
|
||||
|
||||
Ссылка в новой задаче
Block a user