diff --git a/Readme.txt b/Readme.txt index cc4a772d22..e9d044b0ec 100644 --- a/Readme.txt +++ b/Readme.txt @@ -42,7 +42,6 @@ $ cd .../rocprofiler/build $ export LD_LIBRARY_PATH=.: # paths to ROC profiler and oher libraries $ export HSA_TOOLS_LIB=librocprofiler64.so # ROC profiler library loaded by HSA runtime $ export ROCP_TOOL_LIB=test/libtool.so # tool library loaded by ROC profiler -$ export ROCP_HSA_INTERCEPT=1 # enable HSA dispatch intercepting by ROC profiler $ export ROCP_METRICS=metrics.xml # ROC profiler metrics config file $ export ROCP_INPUT=input.xml # input file for the tool library $ export ROCP_OUTPUT_DIR=./ # output directory for the tool library, for metrics results file 'results.txt' and SQTT trace files 'thread_trace.se.out' diff --git a/cmake_modules/env.cmake b/cmake_modules/env.cmake index b7f96420dd..cc9aba62b9 100644 --- a/cmake_modules/env.cmake +++ b/cmake_modules/env.cmake @@ -42,16 +42,6 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} ) add_definitions ( -DDEBUG_TRACE=1 ) endif() -## Enable HSA APIs intersepting -if ( NOT "$ENV{CMAKE_HSA_INTERCEPT}" STREQUAL "no" ) - add_definitions ( -DROCP_HSA_INTERCEPT=1 ) -endif() - -## Enable HSA proxy queue -if ( NOT "$ENV{CMAKE_HSA_PROXY}" STREQUAL "no" ) - add_definitions ( -DROCP_HSA_PROXY=1 ) -endif() - ## Enable direct loading of AQL-profile HSA extension if ( DEFINED ENV{CMAKE_LD_AQLPROFILE} ) add_definitions ( -DROCP_LD_AQLPROFILE=1 ) diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h index 85061b9dc1..07dc842d90 100644 --- a/inc/rocprofiler.h +++ b/inc/rocprofiler.h @@ -79,6 +79,16 @@ extern "C" { uint32_t rocprofiler_version_major(); uint32_t rocprofiler_version_minor(); +//////////////////////////////////////////////////////////////////////////////// +// Global properties structure + +typedef struct { + uint32_t intercept_mode; + uint32_t sqtt_size; + uint64_t timeout; + uint32_t timestamp_on; +} rocprofiler_settings_t; + //////////////////////////////////////////////////////////////////////////////// // Returning the error string method diff --git a/src/core/context.h b/src/core/context.h index 2ee1d896f6..a7a101bbb7 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -375,9 +375,8 @@ class Context { } } - static void SetTimeout(uint64_t timeout) { - timeout_ = timeout; - } + static void SetTimeout(uint64_t timeout) { timeout_ = timeout; } + static uint64_t GetTimeout() { return timeout_; } private: // Getting profling packets diff --git a/src/core/intercept_queue.h b/src/core/intercept_queue.h index 2c8853e571..5b546614d3 100644 --- a/src/core/intercept_queue.h +++ b/src/core/intercept_queue.h @@ -144,6 +144,7 @@ class InterceptQueue { static void SetTimeout(uint64_t timeout) { timeout_ = timeout; } static void TrackerOn(bool on) { tracker_on_ = on; } + static bool IsTrackerOn() { return tracker_on_; } private: InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : diff --git a/src/core/profile.h b/src/core/profile.h index 1bc37a9ef4..71ad2289b7 100644 --- a/src/core/profile.h +++ b/src/core/profile.h @@ -212,7 +212,7 @@ class PmcProfile : public Profile { class SqttProfile : public Profile { public: static inline void SetSize(const uint32_t& size) { output_buffer_size_ = size; } -// static inline uint32_t GetSize() { return output_buffer_size_; } + static inline uint32_t GetSize() { return output_buffer_size_; } SqttProfile(const util::AgentInfo* agent_info) : Profile(agent_info) { profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT; diff --git a/src/core/proxy_queue.cpp b/src/core/proxy_queue.cpp index 59ab280bd1..4083b5cc9f 100644 --- a/src/core/proxy_queue.cpp +++ b/src/core/proxy_queue.cpp @@ -1,8 +1,6 @@ #include "core/proxy_queue.h" -#ifdef ROCP_HSA_PROXY #include "core/hsa_proxy_queue.h" -#endif #include "core/simple_proxy_queue.h" namespace rocprofiler { @@ -17,12 +15,8 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3 uint32_t group_segment_size, hsa_queue_t** queue, hsa_status_t* status) { hsa_status_t suc = HSA_STATUS_ERROR; -#ifdef ROCP_HSA_PROXY ProxyQueue* instance = (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue(); -#else - ProxyQueue* instance = new SimpleProxyQueue(); -#endif if (instance != NULL) { suc = instance->Init(agent, size, type, callback, data, private_segment_size, group_segment_size, queue); diff --git a/src/core/proxy_queue.h b/src/core/proxy_queue.h index 12580245cf..67e2a671f6 100644 --- a/src/core/proxy_queue.h +++ b/src/core/proxy_queue.h @@ -20,14 +20,10 @@ typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_ class ProxyQueue : public Queue { public: static void InitFactory() { -#ifdef ROCP_HSA_PROXY const char* type = getenv("ROCP_PROXY_QUEUE"); if (type != NULL) { if (strncmp(type, "rocp", 4) == 0) rocp_type_ = true; } -#else - rocp_type_ = true; -#endif } static void HsaIntercept(HsaApiTable* table); diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index 0d527c1bf5..61ebcf57e6 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -48,10 +48,8 @@ decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacq decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn; decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn; -#ifdef ROCP_HSA_PROXY decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn; decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn; -#endif ::HsaApiTable* kHsaApiTable; @@ -71,10 +69,8 @@ void SaveHsaApi(::HsaApiTable* table) { hsa_queue_store_write_index_screlease_fn = table->core_->hsa_queue_store_write_index_screlease_fn; hsa_queue_load_read_index_scacquire_fn = table->core_->hsa_queue_load_read_index_scacquire_fn; -#ifdef ROCP_HSA_PROXY hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn; hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn; -#endif } void RestoreHsaApi() { @@ -93,37 +89,58 @@ void RestoreHsaApi() { table->core_->hsa_queue_store_write_index_screlease_fn = hsa_queue_store_write_index_screlease_fn; table->core_->hsa_queue_load_read_index_scacquire_fn = hsa_queue_load_read_index_scacquire_fn; -#ifdef ROCP_HSA_PROXY table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn; table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn; -#endif } typedef void (*tool_handler_t)(); +typedef void (*tool_handler_prop_t)(rocprofiler_settings_t*); void * kTtoolHandle = NULL; -void LoadTool(const char* tool_lib) { +bool LoadTool() { + bool intercept_mode = false; + const char* tool_lib = getenv("ROCP_TOOL_LIB"); + if (tool_lib) { + intercept_mode = true; + kTtoolHandle = dlopen(tool_lib, RTLD_NOW); if (kTtoolHandle == NULL) { fprintf(stderr, "ROCProfiler: can't load tool library \"%s\"\n", tool_lib); fprintf(stderr, "%s\n", dlerror()); - exit(1); + abort(); } tool_handler_t handler = reinterpret_cast(dlsym(kTtoolHandle, "OnLoadTool")); - if (handler == NULL) { - fprintf(stderr, "ROCProfiler: tool library corrupted, OnLoadTool() method is expected\n"); + tool_handler_prop_t handler_prop = reinterpret_cast(dlsym(kTtoolHandle, "OnLoadToolProp")); + if ((handler == NULL) && (handler_prop == NULL)) { + fprintf(stderr, "ROCProfiler: tool library corrupted, OnLoadTool()/OnLoadToolProp() method is expected\n"); fprintf(stderr, "%s\n", dlerror()); - exit(1); + abort(); } tool_handler_t on_unload_handler = reinterpret_cast(dlsym(kTtoolHandle, "OnUnloadTool")); if (on_unload_handler == NULL) { fprintf(stderr, "ROCProfiler: tool library corrupted, OnUnloadTool() method is expected\n"); fprintf(stderr, "%s\n", dlerror()); - exit(1); + abort(); } - handler(); + + rocprofiler_settings_t settings{}; + settings.intercept_mode = (intercept_mode) ? 1 : 0; + settings.sqtt_size = SqttProfile::GetSize(); + settings.timeout = Context::GetTimeout(); + settings.timestamp_on = InterceptQueue::IsTrackerOn() ? 1 : 0; + + if (handler) handler(); + else if (handler_prop) handler_prop(&settings); + + intercept_mode = (settings.intercept_mode != 0); + SqttProfile::SetSize(settings.sqtt_size); + Context::SetTimeout(settings.timeout); + InterceptQueue::SetTimeout(settings.timeout); + InterceptQueue::TrackerOn(settings.timestamp_on != 0); } + + return intercept_mode; } void UnloadTool() { @@ -132,7 +149,7 @@ void UnloadTool() { if (handler == NULL) { fprintf(stderr, "ROCProfiler error: tool library corrupted, OnUnloadTool() method is expected\n"); fprintf(stderr, "%s\n", dlerror()); - exit(1); + abort(); } handler(); dlclose(kTtoolHandle); @@ -141,25 +158,6 @@ void UnloadTool() { CONSTRUCTOR_API void constructor() { util::Logger::Create(); - - const char* sqtt_size_str = getenv("ROCP_SQTT_SIZE"); - if (sqtt_size_str != NULL) { - const uint32_t sqtt_size_val = strtoull(sqtt_size_str, NULL, 0); - SqttProfile::SetSize(sqtt_size_val); - } - - const char* timeout_str = getenv("ROCP_DATA_TIMEOUT"); - if (timeout_str != NULL) { - const uint64_t timeout_val = strtoull(timeout_str, NULL, 0); - Context::SetTimeout(timeout_val); - InterceptQueue::SetTimeout(timeout_val); - } - - const char* tracker_on_str = getenv("ROCP_TRACKER_ON"); - if (tracker_on_str != NULL) { - if (strncmp(tracker_on_str, "true", 4) == 0) InterceptQueue::TrackerOn(true); - if (strncmp(tracker_on_str, "false", 4) == 0) InterceptQueue::TrackerOn(false); - } } DESTRUCTOR_API void destructor() { @@ -185,6 +183,7 @@ const MetricsDict* GetMetrics(const hsa_agent_t& agent) { return metrics; } +rocprofiler_properties_t rocprofiler_properties; uint64_t Context::timeout_ = UINT64_MAX; uint32_t SqttProfile::output_buffer_size_ = 0x2000000; // 32M Tracker::mutex_t Tracker::mutex_; @@ -200,15 +199,14 @@ extern "C" { // HSA-runtime tool on-load method PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { - const bool intercept_mode = (getenv("ROCP_HSA_INTERCEPT") != NULL); rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); + const bool intercept_mode = rocprofiler::LoadTool(); // HSA intercepting if (intercept_mode) { - rocprofiler::InterceptQueue::HsaIntercept(table); rocprofiler::ProxyQueue::HsaIntercept(table); + rocprofiler::InterceptQueue::HsaIntercept(table); } - rocprofiler::LoadTool(getenv("ROCP_TOOL_LIB")); return true; } diff --git a/src/core/simple_proxy_queue.h b/src/core/simple_proxy_queue.h index c1c18862bd..9642652508 100644 --- a/src/core/simple_proxy_queue.h +++ b/src/core/simple_proxy_queue.h @@ -140,8 +140,8 @@ class SimpleProxyQueue : public ProxyQueue { queue_index_(0), queue_mask_(0), submit_index_(0), - on_submit_cb_(0), - on_submit_cb_data_(0) + on_submit_cb_(NULL), + on_submit_cb_data_(NULL) { printf("ROCProfiler: SimpleProxyQueue is enabled\n"); fflush(stdout); diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index 4e68a0ce65..009f50d5a6 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -86,13 +86,6 @@ std::vector* kernel_string_vec = NULL; // DIspatch number range filter std::vector* range_vec = NULL; // Otstanding dispatches parameters -#if 0 -static uint32_t CTX_OUTSTANDING_MAX_DFLT = 10000; -static uint32_t CTX_OUTSTANDING_WAIT_DFLT = 1000; -static uint32_t CTX_OUTSTANDING_WAIT_MAX = 1000000; -static uint32_t CTX_OUTSTANDING_MAX = CTX_OUTSTANDING_MAX_DFLT; -static uint32_t CTX_OUTSTANDING_WAIT = CTX_OUTSTANDING_WAIT_DFLT; -#endif static uint32_t CTX_OUTSTANDING_MAX = 0; static uint32_t CTX_OUTSTANDING_MON = 0; // to truncate kernel names @@ -162,6 +155,8 @@ void* monitor_thr_fun(void*) { abort(); } const uint32_t inflight = context_count - context_collected; + std::cerr << std::flush; + std::clog << std::flush; std::cout << "ROCProfiler: count(" << context_count << "), outstanding(" << inflight << "/" << CTX_OUTSTANDING_MAX << ")" << std::endl << std::flush; if (pthread_mutex_unlock(&mutex) != 0) { perror("pthread_mutex_unlock"); @@ -186,21 +181,6 @@ uint32_t next_context_count() { // Allocate entry to store profiling context context_entry_t* alloc_context_entry() { -#if 0 - uint32_t context_inflight = context_count - context_collected; - if (context_inflight > CTX_OUTSTANDING_MAX) { - if (trace_on) std::cout << "inflight " << context_inflight << " tid " << GetTid() << " <" << CTX_OUTSTANDING_WAIT << "usec, max " << CTX_OUTSTANDING_MAX << ">" << std::flush; - usleep(CTX_OUTSTANDING_WAIT); - if (CTX_OUTSTANDING_WAIT > CTX_OUTSTANDING_WAIT_MAX) { - CTX_OUTSTANDING_MAX = 1 + (CTX_OUTSTANDING_MAX >> 1); - } else { - CTX_OUTSTANDING_WAIT = CTX_OUTSTANDING_WAIT << 1; - } - } else { - CTX_OUTSTANDING_MAX = CTX_OUTSTANDING_MAX_DFLT; - CTX_OUTSTANDING_WAIT = CTX_OUTSTANDING_WAIT_DFLT; - } -#endif if (CTX_OUTSTANDING_MAX != 0) { while((context_count - context_collected) > CTX_OUTSTANDING_MAX) usleep(1000); } @@ -598,6 +578,27 @@ static hsa_status_t info_callback(const rocprofiler_info_data_t info, void * arg return HSA_STATUS_SUCCESS; } +std::string normalize_token(const std::string token, bool not_empty, std::string label) { + const std::string space_chars_set = " \t"; + const size_t first_pos = token.find_first_not_of(space_chars_set); + size_t norm_len = 0; + std::string error_str = "none"; + if (first_pos != std::string::npos) { + const size_t last_pos = token.find_last_not_of(space_chars_set); + if (last_pos == std::string::npos) error_str = "token string error: \"" + token + "\""; + else { + const size_t end_pos = last_pos + 1; + if (end_pos <= first_pos) error_str = "token string error: \"" + token + "\""; + else norm_len = end_pos - first_pos; + } + } + if (((first_pos != std::string::npos) && (norm_len == 0)) || + ((first_pos == std::string::npos) && not_empty)) { + fatal(label + ": " + error_str); + } + return (norm_len != 0) ? token.substr(first_pos, norm_len) : std::string(""); +} + int get_xml_array(xml::Xml* xml, const std::string& tag, const std::string& field, const std::string& delim, std::vector* vec, const char* label = NULL) { int parse_iter = 0; auto nodes = xml->GetNodes(tag); @@ -615,33 +616,12 @@ int get_xml_array(xml::Xml* xml, const std::string& tag, const std::string& fiel const size_t string_len = array_string.length(); while (pos1 < string_len) { const size_t pos2 = array_string.find(delim, pos1); + const bool found = (pos2 != std::string::npos); const size_t token_len = (pos2 != std::string::npos) ? pos2 - pos1 : string_len - pos1; const std::string token = array_string.substr(pos1, token_len); - - const std::string space_chars_set = " \t"; - const size_t first_pos = token.find_first_not_of(space_chars_set); - size_t norm_len = 0; - std::string error_str = "none"; - if (first_pos != std::string::npos) { - const size_t last_pos = token.find_last_not_of(space_chars_set); - if (last_pos == std::string::npos) error_str = "token string error: \"" + token + "\""; - else { - const size_t end_pos = last_pos + 1; - if (end_pos <= first_pos) error_str = "token string error: \"" + token + "\""; - else norm_len = end_pos - first_pos; - } - } - - if (norm_len != 0) { - vec->push_back(token.substr(first_pos, norm_len)); - } - - if (((first_pos != std::string::npos) && (norm_len == 0)) || - ((first_pos == std::string::npos) && (pos2 != std::string::npos))) { - fatal("Tokens array parsing error, file '" + xml->GetName() + "', " + tag + "::" + field + ": " + error_str); - } - - if (pos2 == std::string::npos) break; + const std::string norm_str = normalize_token(token, found, "Tokens array parsing error, file '" + xml->GetName() + "', " + tag + "::" + field); + if (norm_str.length() != 0) vec->push_back(norm_str); + if (!found) break; pos1 = pos2 + 1; ++parse_iter; } @@ -667,7 +647,7 @@ static inline void check_env_var(const char* var_name, uint64_t& val) { } // Tool constructor -extern "C" PUBLIC_API void OnLoadTool() +extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) { if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); @@ -680,33 +660,61 @@ extern "C" PUBLIC_API void OnLoadTool() abort(); } - xml::Xml* rcfile = xml::Xml::Create(std::string("./") + rcfile_name); + // Loading configuration rcfile + std::string rcpath = std::string("./") + rcfile_name; + xml::Xml* rcfile = xml::Xml::Create(rcpath); const char* home_dir = getenv("HOME"); if (rcfile == NULL && home_dir != NULL) { - rcfile = xml::Xml::Create(std::string(home_dir) + "/" + rcfile_name); + rcpath = std::string(home_dir) + "/" + rcfile_name; + rcfile = xml::Xml::Create(rcpath); + } + const char* pkg_dir = getenv("ROCP_PACKAGE_DIR"); + if (rcfile == NULL && pkg_dir != NULL) { + rcpath = std::string(pkg_dir) + "/" + rcfile_name; + rcfile = xml::Xml::Create(rcpath); } if (rcfile != NULL) { // Getting defaults + printf("ROCProfiler: rc-file '%s'\n", rcpath.c_str()); auto defaults_list = rcfile->GetNodes("top.defaults"); for (auto* entry : defaults_list) { - for (const auto& opt : entry->opts) { - std::cout << "default: " << opt.first << " = " << opt.second << std::endl; + const auto& opts = entry->opts; + auto it = opts.find("basenames"); + if (it != opts.end()) { to_truncate_names = (it->second == "on") ? 1 : 0; } + it = opts.find("timestamp"); + if (it != opts.end()) { settings->timestamp_on = (it->second == "on") ? 1 : 0; } + it = opts.find("ctx-limit"); + if (it != opts.end()) { CTX_OUTSTANDING_MAX = atol(it->second.c_str()); } + it = opts.find("heartbeat"); + if (it != opts.end()) { CTX_OUTSTANDING_MON = atol(it->second.c_str()); } + it = opts.find("sqtt-size"); + if (it != opts.end()) { + std::string str = normalize_token(it->second, true, "option sqtt-size"); + uint32_t multiplier = 1; + switch (str.back()) { + case 'K': multiplier = 1024; break; + case 'M': multiplier = 1024 * 1024; break; + } + if (multiplier != 1) str = str.substr(0, str.length() - 1); + settings->sqtt_size = strtoull(str.c_str(), NULL, 0) * multiplier; } } } + // Enable verbose mode + check_env_var("ROCP_VERBOSE_MODE", verbose); + // Enable kernel names truncating + check_env_var("ROCP_TRUNCATE_NAMES", to_truncate_names); + // Set outstanding dispatches parameter + check_env_var("ROCP_OUTSTANDING_MAX", CTX_OUTSTANDING_MAX); + check_env_var("ROCP_OUTSTANDING_MON", CTX_OUTSTANDING_MON); + // Enable timestamping + check_env_var("ROCP_TIMESTAMP_ON", settings->timestamp_on); + // Set data timeout + check_env_var("ROCP_DATA_TIMEOUT", settings->timeout); + // Set SQTT size + check_env_var("ROCP_SQTT_SIZE", settings->sqtt_size); - std::map parameters_dict; - parameters_dict["TARGET_CU"] = - HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; - parameters_dict["VM_ID_MASK"] = - HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; - parameters_dict["MASK"] = - HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; - parameters_dict["TOKEN_MASK"] = - HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; - parameters_dict["TOKEN_MASK2"] = - HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; - + // Printing out info char* info_symb = getenv("ROCP_INFO"); if (info_symb != NULL) { if (*info_symb != 'b' && *info_symb != 'd') { @@ -718,28 +726,6 @@ extern "C" PUBLIC_API void OnLoadTool() } exit(1); } - // Enable verbose mode - check_env_var("ROCP_VERBOSE_MODE", verbose); - // Enable kernel names truncating - check_env_var("ROCP_TRUNCATE_NAMES", to_truncate_names); - // Set outstanding dispatches parameter - check_env_var("ROCP_OUTSTANDING_MAX", CTX_OUTSTANDING_MAX); - check_env_var("ROCP_OUTSTANDING_MON", CTX_OUTSTANDING_MON); -#if 0 - // Set outstanding dispatches parameter - const char* dispatches_max = getenv("ROCP_OUTSTANDING_MAX"); - const char* dispatches_wait = getenv("ROCP_OUTSTANDING_WAIT"); - const char* dispatches_wait_max = getenv("ROCP_OUTSTANDING_WAIT_MAX"); - if (dispatches_max != NULL ) { - if (dispatches_wait == NULL) fatal("ROCP_OUTSTANDING_WAIT should be defined together with ROCP_OUTSTANDING_MAX env var"); - if (dispatches_wait_max == NULL) fatal("ROCP_OUTSTANDING_WAIT_MAX should be defined together with ROCP_OUTSTANDING_MAX env var"); - CTX_OUTSTANDING_MAX_DFLT = atol(dispatches_max); - CTX_OUTSTANDING_WAIT_DFLT = atol(dispatches_wait); - CTX_OUTSTANDING_WAIT_MAX = atol(dispatches_wait_max); - CTX_OUTSTANDING_MAX = CTX_OUTSTANDING_MAX_DFLT; - CTX_OUTSTANDING_WAIT = CTX_OUTSTANDING_WAIT_DFLT; - } -#endif // Set output file result_prefix = getenv("ROCP_OUTPUT_DIR"); @@ -760,8 +746,7 @@ extern "C" PUBLIC_API void OnLoadTool() perror(errmsg.str().c_str()); abort(); } - } else - result_file_handle = stdout; + } else result_file_handle = stdout; result_file_opened = (result_prefix != NULL) && (result_file_handle != NULL); @@ -837,6 +822,18 @@ extern "C" PUBLIC_API void OnLoadTool() features[index].name = strdup(name.c_str()); features[index].data.result_bytes.copy = to_copy_data; + std::map parameters_dict; + parameters_dict["TARGET_CU"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; + parameters_dict["VM_ID_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; + parameters_dict["MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters_dict["TOKEN_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters_dict["TOKEN_MASK2"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; + for (auto* params : params_list) { const unsigned parameter_count = params->opts.size(); rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count]; diff --git a/test/run.sh b/test/run.sh index d03338c5a4..eb016fed80 100755 --- a/test/run.sh +++ b/test/run.sh @@ -11,8 +11,6 @@ export ROCPROFILER_LOG=1 export HSA_TOOLS_LIB=librocprofiler64.so # tool library loaded by ROC profiler export ROCP_TOOL_LIB=libtool.so -# enable HSA dispatch intercepting by ROC profiler -export ROCP_HSA_INTERCEPT=1 # ROC profiler metrics config file unset ROCP_PROXY_QUEUE # ROC profiler metrics config file