diff --git a/projects/rocprofiler/inc/rocprofiler.h b/projects/rocprofiler/inc/rocprofiler.h index 0cb4874b50..c6c58baa77 100644 --- a/projects/rocprofiler/inc/rocprofiler.h +++ b/projects/rocprofiler/inc/rocprofiler.h @@ -224,10 +224,9 @@ typedef struct { // Profiling callback type typedef hsa_status_t (*rocprofiler_callback_t)( - const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on - // the callback API id + const rocprofiler_callback_data_t* callback_data, // [in] callback data void* user_data, // [in/out] user data passed to the callback - rocprofiler_group_t* group); // [out] profiling group + rocprofiler_group_t* group); // [out] returned profiling group // Queue callbacks typedef struct { diff --git a/projects/rocprofiler/src/core/hsa_queue.h b/projects/rocprofiler/src/core/hsa_queue.h index 620f62249b..12ef97bb0c 100644 --- a/projects/rocprofiler/src/core/hsa_queue.h +++ b/projects/rocprofiler/src/core/hsa_queue.h @@ -32,15 +32,6 @@ namespace rocprofiler { class HsaQueue : public Queue { public: - typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet); - enum { - LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t), - LEGACY_SLOT_SIZE_P = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t) - }; - struct slot_pm4_t { - packet_word_t words[LEGACY_SLOT_SIZE_W]; - }; - HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {} void Submit(const packet_t* packet) { diff --git a/projects/rocprofiler/src/core/metrics.h b/projects/rocprofiler/src/core/metrics.h index 8f05a3e74a..46806dcfac 100644 --- a/projects/rocprofiler/src/core/metrics.h +++ b/projects/rocprofiler/src/core/metrics.h @@ -195,7 +195,7 @@ class MetricsDict { } static hsa_ven_amd_aqlprofile_id_query_t Translate(const util::AgentInfo* agent_info, const std::string& block_name) { - hsa_ven_amd_aqlprofile_profile_t profile; + hsa_ven_amd_aqlprofile_profile_t profile{}; profile.agent = agent_info->dev_id; hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0}; hsa_status_t status = diff --git a/projects/rocprofiler/src/core/rocprofiler.cpp b/projects/rocprofiler/src/core/rocprofiler.cpp index e8901387f4..4b7fc9a452 100644 --- a/projects/rocprofiler/src/core/rocprofiler.cpp +++ b/projects/rocprofiler/src/core/rocprofiler.cpp @@ -56,6 +56,16 @@ THE SOFTWARE. // Internal library methods // namespace rocprofiler { +hsa_status_t CreateQueuePro( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue); + decltype(hsa_queue_create)* hsa_queue_create_fn; decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; @@ -115,6 +125,11 @@ void RestoreHsaApi() { table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn; } +void StandaloneIntercept() { + ::HsaApiTable* table = kHsaApiTable; + table->core_->hsa_queue_create_fn = rocprofiler::CreateQueuePro; +} + typedef void (*tool_handler_t)(); typedef void (*tool_handler_prop_t)(rocprofiler_settings_t*); void * tool_handle = NULL; @@ -195,9 +210,7 @@ DESTRUCTOR_API void destructor() { const MetricsDict* GetMetrics(const hsa_agent_t& agent) { rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent); - if (agent_info == NULL) { - EXC_RAISING(HSA_STATUS_ERROR, "agent is not found"); - } + if (agent_info == NULL) EXC_RAISING(HSA_STATUS_ERROR, "agent is not found"); const MetricsDict* metrics = MetricsDict::Create(agent_info); if (metrics == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed"); return metrics; @@ -209,6 +222,94 @@ hsa_status_t GetExcStatus(const std::exception& e) { : HSA_STATUS_ERROR; } + +inline size_t CreateEnableCmd(const hsa_agent_t& agent, packet_t* command, const size_t& slot_count) { + rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); + const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent); + const bool is_legacy = (strncmp(agent_info->name, "gfx8", 4) == 0); + const size_t packet_count = (is_legacy) ? Profile::LEGACY_SLOT_SIZE_PKT : 1; + + if (packet_count > slot_count) EXC_RAISING(HSA_STATUS_ERROR, "packet_count > slot_count"); + + // AQLprofile object + hsa_ven_amd_aqlprofile_profile_t profile{}; + profile.agent = agent_info->dev_id; + // Query for cmd buffer size + hsa_status_t status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, NULL); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).size exc"); + if (profile.command_buffer.size == 0) EXC_RAISING(status, "get_info(ENABLE_CMD).size == 0"); + // Allocate cmd buffer + const size_t aligment_mask = 0x100 - 1; + profile.command_buffer.ptr = + hsa_rsrc->AllocateSysMemory(agent_info, profile.command_buffer.size); + if ((reinterpret_cast(profile.command_buffer.ptr) & aligment_mask) != 0) { + EXC_RAISING(status, "profile.command_buffer.ptr bad alignment"); + } + + // Generating cmd packet + if (is_legacy) { + packet_t packet{}; + + // Query for cmd buffer data + status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, &packet); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc"); + + // Check for legacy GFXIP + status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_legacy_get_pm4(&packet, command); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + } else { + // Query for cmd buffer data + status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, command); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc"); + } + + // Return cmd packet data size + return (packet_count * sizeof(packet_t)); +} + +hsa_status_t CreateQueuePro( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue) +{ + static packet_t enable_cmd_packet[Profile::LEGACY_SLOT_SIZE_PKT]; + static size_t enable_cmd_size = 0; + static std::mutex enable_cmd_mutex; + + // Create HSA queue + hsa_status_t status = hsa_queue_create_fn( + agent, + size, + type, + callback, + data, + private_segment_size, + group_segment_size, + queue); + if (status != HSA_STATUS_SUCCESS) return status; + + // Create 'Enable' cmd packet + if (enable_cmd_size == 0) { + std::lock_guard lck(enable_cmd_mutex); + if (enable_cmd_size == 0) { + enable_cmd_size = CreateEnableCmd(agent, enable_cmd_packet, Profile::LEGACY_SLOT_SIZE_PKT); + } + } + + // Enable counters for the queue + rocprofiler::util::HsaRsrcFactory::Instance().Submit(*queue, enable_cmd_packet, enable_cmd_size); + + return HSA_STATUS_SUCCESS; +} + rocprofiler_properties_t rocprofiler_properties; uint32_t SqttProfile::output_buffer_size_ = 0x2000000; // 32M bool SqttProfile::output_buffer_local_ = true; @@ -261,7 +362,10 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa if (intercept_mode) { rocprofiler::ProxyQueue::HsaIntercept(table); rocprofiler::InterceptQueue::HsaIntercept(table); + } else { + rocprofiler::StandaloneIntercept(); } + return true; } diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp index 3c50d27dc4..b24850eeae 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp @@ -550,7 +550,7 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { } uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { - const uint32_t slot_size_b = 0x40; + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; // adevance command queue const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue); @@ -578,7 +578,7 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { } uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { - const uint32_t slot_size_b = 0x40; + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; if ((size_bytes & (slot_size_b - 1)) != 0) { fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); abort(); diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.h b/projects/rocprofiler/src/util/hsa_rsrc_factory.h index c76046d2e8..d269ceb2fb 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.h +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.h @@ -154,6 +154,7 @@ class HsaTimer { class HsaRsrcFactory { public: + static const size_t CMD_SLOT_SIZE_B = 0x40; typedef std::recursive_mutex mutex_t; typedef HsaTimer::timestamp_t timestamp_t; diff --git a/projects/rocprofiler/test/CMakeLists.txt b/projects/rocprofiler/test/CMakeLists.txt index 2f35639d85..2b12b28a66 100644 --- a/projects/rocprofiler/test/CMakeLists.txt +++ b/projects/rocprofiler/test/CMakeLists.txt @@ -35,19 +35,35 @@ endif () ## Util sources file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" ) +## Standalone test sources +set ( STEXE_NAME "standalone_test" ) +set ( STST_SRC + ${TEST_DIR}/app/standalone_test.cpp + ${TEST_DIR}/ctrl/test_hsa.cpp +) + ## Test control sources set ( CTRL_SRC ${TEST_DIR}/app/test.cpp ${TEST_DIR}/ctrl/test_hsa.cpp ) -## Test kernel sources +## Dummy kernel +set ( DUMMY_NAME dummy_kernel ) +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${DUMMY_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" ) + +## Test kernel set ( TEST_NAME simple_convolution ) set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp ) execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" ) -## Building test executable -add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} ) +## Building standalone test executable +add_executable ( ${STEXE_NAME} ${STST_SRC} ${UTIL_SRC} ${KERN_SRC} ) +target_include_directories ( ${STEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries( ${STEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt ) + +## Building ctrl test executable +add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} ) target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) target_link_libraries( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt ) execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" ) diff --git a/projects/rocprofiler/test/app/standalone_test.cpp b/projects/rocprofiler/test/app/standalone_test.cpp index ac5cfdc72a..28a34ea0d0 100644 --- a/projects/rocprofiler/test/app/standalone_test.cpp +++ b/projects/rocprofiler/test/app/standalone_test.cpp @@ -22,97 +22,25 @@ THE SOFTWARE. #include #include +#include #include #include "ctrl/run_kernel.h" #include "ctrl/test_aql.h" #include "ctrl/test_hsa.h" #include "inc/rocprofiler.h" +#include "dummy_kernel/dummy_kernel.h" #include "simple_convolution/simple_convolution.h" #include "util/test_assert.h" -int main(int argc, char** argv) { - bool ret_val = false; - // HSA status - hsa_status_t status = HSA_STATUS_ERROR; - // Profiling context - rocprofiler_t* context = NULL; - // Profiling properties - rocprofiler_properties_t properties; - // Number of context invocation - uint32_t invocation = 0; - -#if 0 - // Profiling info objects - const unsigned info_count = 1; - rocprofiler_info_t info[info_count]; - // PMC events - memset(info, 0, sizeof(info)); - info[0].type = ROCPROFILER_TYPE_METRIC; - info[0].name = "SQ_WAVES"; -#else - // Profiling info objects - const unsigned info_count = 3; - rocprofiler_info_t info[info_count]; - // PMC events - memset(info, 0, sizeof(info)); - info[0].type = ROCPROFILER_TYPE_METRIC; - info[0].name = "SQ_WAVES"; - info[1].type = ROCPROFILER_TYPE_METRIC; - info[1].name = "SQ_ITEMS"; - // Tracing parameters - const unsigned parameter_count = 2; - rocprofiler_parameter_t parameters[parameter_count]; - info[2].name = "THREAD_TRACE"; - info[2].type = ROCPROFILER_TYPE_TRACE; - info[2].parameters = parameters; - info[2].parameter_count = parameter_count; - parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; - parameters[0].value = 0; - parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; - parameters[1].value = 0; -#endif - - // Creating profiling context - properties = {}; - properties.queue_depth = 128; - status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context, - ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_OWNQUEUE, &properties); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - - TestHsa::SetQueue(properties.queue); - - // Adding dispatch observer - status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - - // Querying the number of context invocation - status = rocprofiler_invocation(context, &invocation); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - - // Dispatching profiled kernel n-times to collect all counter groups data - unsigned n = 0; - while (1) { - std::cout << "> " << n << "/" << invocation << std::endl; -#if 0 - status = rocprofiler_start(context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - ret_val = RunKernel(argc, argv); - status = rocprofiler_stop(context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); -#else - ret_val = RunKernel(argc, argv); -#endif - status = rocprofiler_sample(context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - - for (rocprofiler_info_t* p = info; p < info + info_count; ++p) { - std::cout << (p - info) << ": " << p->name; +void print_features(rocprofiler_feature_t* feature, uint32_t feature_count) { + for (rocprofiler_feature_t* p = feature; p < feature + feature_count; ++p) { + std::cout << (p - feature) << ": " << p->name; switch (p->data.kind) { case ROCPROFILER_DATA_KIND_INT64: - std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl; + std::cout << std::dec << " result64 (" << p->data.result_int64 << ")" << std::endl; break; - case ROCPROFILER_BYTES: { + case ROCPROFILER_DATA_KIND_BYTES: { const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); uint64_t size = 0; for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { @@ -130,15 +58,119 @@ int main(int argc, char** argv) { TEST_ASSERT(false); } } +} - ++n; - if (n < invocation) { - status = rocprofiler_next(context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - continue; - } - break; +void read_features(uint32_t n, rocprofiler_t* context, rocprofiler_feature_t* feature, const unsigned feature_count) { + std::cout << "read features" << std::endl; + hsa_status_t status = rocprofiler_read(context, n); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + std::cout << "read issue" << std::endl; + status = rocprofiler_get_data(context, n); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + status = rocprofiler_get_metrics(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + print_features(feature, feature_count); +} + +int main() { + bool ret_val = false; + // HSA status + hsa_status_t status = HSA_STATUS_ERROR; + // Profiling context + rocprofiler_t* context = NULL; + // Profiling properties + rocprofiler_properties_t properties; + + // Profiling feature objects + const unsigned feature_count = 9; + rocprofiler_feature_t feature[feature_count]; + // PMC events + memset(feature, 0, sizeof(feature)); + feature[0].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[0].name = "GRBM_COUNT"; + feature[1].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[1].name = "GRBM_GUI_ACTIVE"; + feature[2].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[2].name = "GPUBusy"; + feature[3].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[3].name = "SQ_WAVES"; + feature[4].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[4].name = "SQ_INSTS_VALU"; + feature[5].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[5].name = "VALUInsts"; + feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[6].name = "TCC_HIT_sum"; + feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[7].name = "TCC_MISS_sum"; + feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; + feature[8].name = "WRITE_SIZE"; +// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// feature[8].name = "TCC_EA_WRREQ_sum"; +// feature[9].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// feature[9].name = "TCC_EA_WRREQ_64B_sum"; +#if 0 + // Tracing parameters + const unsigned parameter_count = 2; + rocprofiler_parameter_t parameters[parameter_count]; + feature[2].name = "THREAD_TRACE"; + feature[2].kind = ROCPROFILER_FEATURE_KIND_TRACE; + feature[2].parameters = parameters; + feature[2].parameter_count = parameter_count; + parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters[0].value = 0; + parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters[1].value = 0; +#endif + + // Instantiate HSA resources + HsaRsrcFactory::Create(); + + // Getting GPU device info + const AgentInfo* agent_info = NULL; + if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) abort(); + + // Creating the queues pool + const unsigned queue_count = 16; + hsa_queue_t* queue[queue_count]; + for (unsigned queue_ind = 0; queue_ind < queue_count; ++queue_ind) { + if (HsaRsrcFactory::Instance().CreateQueue(agent_info, 128, &queue[queue_ind]) == false) abort(); } + hsa_queue_t* prof_queue = queue[0]; + + // Creating profiling context + properties = {}; + properties.queue = prof_queue; + status = rocprofiler_open(agent_info->dev_id, feature, feature_count, &context, + ROCPROFILER_MODE_STANDALONE, &properties); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + // Test initialization + TestHsa::SetQueue(prof_queue); + TestHsa::HsaInstantiate(0); + + // Dispatching profiled kernel n-times to collect all counter groups data + const unsigned group_n = 0; + status = rocprofiler_start(context, group_n); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + std::cout << "start" << std::endl; + + for (unsigned ind = 0; ind < 3; ++ind) { +#if 1 + const unsigned queue_ind = ind % queue_count; + TestHsa::SetQueue(queue[queue_ind]); + // ret_val = RunKernel(); + ret_val = RunKernel(); + std::cout << "run kernel, queue " << queue_ind << std::endl; +#else + sleep(3); +#endif + read_features(group_n, context, feature, feature_count); + } + + // Stop counters + status = rocprofiler_stop(context, group_n); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + std::cout << "stop" << std::endl; // Finishing cleanup // Deleting profiling context will delete all allocated resources diff --git a/projects/rocprofiler/test/ctrl/run_kernel.h b/projects/rocprofiler/test/ctrl/run_kernel.h index b122664b26..64aa3d3f8e 100644 --- a/projects/rocprofiler/test/ctrl/run_kernel.h +++ b/projects/rocprofiler/test/ctrl/run_kernel.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include "ctrl/test_hsa.h" #include "util/test_assert.h" -template bool RunKernel(int argc, char* argv[], int count = 1) { +template bool RunKernel(int argc = 0, char* argv[] = NULL, int count = 1) { bool ret_val = false; // Create test kernel object diff --git a/projects/rocprofiler/test/ctrl/test_hsa.cpp b/projects/rocprofiler/test/ctrl/test_hsa.cpp index 878618218c..47d0f54e4c 100644 --- a/projects/rocprofiler/test/ctrl/test_hsa.cpp +++ b/projects/rocprofiler/test/ctrl/test_hsa.cpp @@ -71,7 +71,7 @@ void TestHsa::HsaShutdown() { if (hsa_rsrc_) hsa_rsrc_->Destroy(); } -bool TestHsa::Initialize(int arg_cnt, char** arg_list) { +bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) { std::clog << "TestHsa::Initialize :" << std::endl; // Instantiate a Timer object @@ -119,6 +119,8 @@ bool TestHsa::Setup() { mem_map_t& mem_map = test_->GetMemMap(); for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) { mem_descr_t& des = it->second; + if (des.size == 0) continue; + switch (des.id) { case TestKernel::LOCAL_DES_ID: des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size); @@ -245,6 +247,8 @@ bool TestHsa::VerifyResults() { const uint32_t size = test_->GetOutputSize(); bool suc = false; + if (size == 0) return true; + // Copy local kernel output buffers from local memory into host memory if (test_->IsOutputLocal()) { output = hsa_rsrc_->AllocateSysMemory(agent_info_, size); diff --git a/projects/rocprofiler/test/ctrl/test_hsa.h b/projects/rocprofiler/test/ctrl/test_hsa.h index 84080e7728..a7d5398808 100644 --- a/projects/rocprofiler/test/ctrl/test_hsa.h +++ b/projects/rocprofiler/test/ctrl/test_hsa.h @@ -35,6 +35,7 @@ class TestHsa : public TestAql { static HsaRsrcFactory* HsaInstantiate(const uint32_t agent_ind = agent_id_); static void HsaShutdown(); static void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; } + static hsa_agent_t HsaAgent() { return agent_info_->dev_id; } static uint32_t HsaAgentId() { return agent_id_; } // Constructor diff --git a/projects/rocprofiler/test/dummy_kernel/dummy_kernel.cl b/projects/rocprofiler/test/dummy_kernel/dummy_kernel.cl new file mode 100644 index 0000000000..4ab159c865 --- /dev/null +++ b/projects/rocprofiler/test/dummy_kernel/dummy_kernel.cl @@ -0,0 +1,28 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +/** + dummy kernel + */ +__kernel void DummyKernel() { + uint tid = get_global_id(0); +} diff --git a/projects/rocprofiler/test/dummy_kernel/dummy_kernel.h b/projects/rocprofiler/test/dummy_kernel/dummy_kernel.h new file mode 100644 index 0000000000..1b8ce4309a --- /dev/null +++ b/projects/rocprofiler/test/dummy_kernel/dummy_kernel.h @@ -0,0 +1,71 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_ +#define TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_ + +#include +#include + +#include "ctrl/test_kernel.h" + +// Class implements DummyKernel kernel parameters +class DummyKernel : public TestKernel { + public: + // Kernel buffers IDs + enum { KERNARG_BUF_ID, LOCAL_BUF_ID }; + + // Constructor + DummyKernel() : + width_(64), + height_(64) + { + SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0); + SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0); + } + + // Initialize method + void Init() {} + + // Return compute grid size + uint32_t GetGridSize() const { return width_ * height_; } + + // Print output + void PrintOutput(const void* ptr) const {} + + // Return name + std::string Name() const { return std::string("DummyKernel"); } + + private: + // Reference CPU implementation + bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask, + const uint32_t width, const uint32_t height, + const uint32_t maskWidth, const uint32_t maskHeight) { return true; } + + // Width of the Input array + const uint32_t width_; + + // Height of the Input array + const uint32_t height_; +}; + +#endif // TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_ diff --git a/projects/rocprofiler/test/dummy_kernel/gfx8_DummyKernel.hsaco b/projects/rocprofiler/test/dummy_kernel/gfx8_DummyKernel.hsaco new file mode 100755 index 0000000000..35866785c0 Binary files /dev/null and b/projects/rocprofiler/test/dummy_kernel/gfx8_DummyKernel.hsaco differ diff --git a/projects/rocprofiler/test/dummy_kernel/gfx9_DummyKernel.hsaco b/projects/rocprofiler/test/dummy_kernel/gfx9_DummyKernel.hsaco new file mode 100755 index 0000000000..35866785c0 Binary files /dev/null and b/projects/rocprofiler/test/dummy_kernel/gfx9_DummyKernel.hsaco differ diff --git a/projects/rocprofiler/test/run.sh b/projects/rocprofiler/test/run.sh index cc5bd50144..8d8ca8fffd 100755 --- a/projects/rocprofiler/test/run.sh +++ b/projects/rocprofiler/test/run.sh @@ -22,21 +22,21 @@ # THE SOFTWARE. ################################################################################ -test_bin_dflt=./test/ctrl - # paths to ROC profiler and oher libraries export LD_LIBRARY_PATH=$PWD -# enable error messages logging to '/tmp/rocprofiler_log.txt' -export ROCPROFILER_LOG=1 - # ROC profiler library loaded by HSA runtime export HSA_TOOLS_LIB=librocprofiler64.so -# tool library loaded by ROC profiler -export ROCP_TOOL_LIB=libtool.so +# enable error messages logging to '/tmp/rocprofiler_log.txt' +export ROCPROFILER_LOG=1 # ROC profiler metrics config file unset ROCP_PROXY_QUEUE # ROC profiler metrics config file export ROCP_METRICS=metrics.xml + +eval ./test/standalone_test + +# tool library loaded by ROC profiler +export ROCP_TOOL_LIB=libtool.so # ROC profiler kernels timing export ROCP_TIMESTAMP_ON=1 # output directory for the tool library, for metrics results file 'results.txt' @@ -47,21 +47,15 @@ if [ ! -e $ROCP_TOOL_LIB ] ; then export ROCP_TOOL_LIB=test/libtool.so fi -if [ -n "$1" ] ; then - tbin="$*" -else - tbin=$test_bin_dflt -fi - export ROCP_KITER=100 export ROCP_DITER=100 export ROCP_INPUT=input.xml -eval $tbin +eval ./test/ctrl -#export ROCP_KITER=1 -#export ROCP_DITER=4 -#export ROCP_INPUT=input1.xml -#eval $tbin +export ROCP_KITER=1 +export ROCP_DITER=4 +export ROCP_INPUT=input1.xml +eval ./test/ctrl #valgrind --leak-check=full $tbin #valgrind --tool=massif $tbin