// MIT License // // Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "core/rocprofiler-sdk.hpp" #include "core/config.hpp" #include "core/debug.hpp" #include "timemory.hpp" #include #if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 # include # include # include # include # include # include # include # include # include # include # include # include # define ROCPROFILER_CALL(result) \ { \ rocprofiler_status_t CHECKSTATUS = (result); \ if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ { \ auto msg = std::stringstream{}; \ std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ msg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " \ << "rocprofiler-sdk call [" << #result \ << "] failed with error code " << CHECKSTATUS \ << " :: " << status_msg; \ ROCPROFSYS_WARNING(0, "%s\n", msg.str().c_str()); \ } \ } namespace rocprofsys { namespace rocprofiler_sdk { namespace { std::string get_setting_name(std::string _v) { constexpr auto _prefix = tim::string_view_t{ "rocprofsys_" }; for(auto& itr : _v) itr = tolower(itr); auto _pos = _v.find(_prefix); if(_pos == 0) return _v.substr(_prefix.length()); return _v; } # define ROCPROFSYS_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \ [&]() { \ auto _ret = _config->insert( \ ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, \ TYPE{ INITIAL_VALUE }, \ std::set{ "custom", "rocprofsys", "librocprof-sys", \ __VA_ARGS__ }); \ if(!_ret.second) \ { \ ROCPROFSYS_PRINT("Warning! Duplicate setting: %s / %s\n", \ get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ } \ return _config->find(ENV_NAME)->second; \ }() template std::string to_lower(const Tp& _val) { auto _v = std::string{ _val }; for(auto& itr : _v) itr = ::tolower(itr); return _v; } struct operation_options { std::string operations_include = {}; std::string operations_exclude = {}; std::string operations_annotate_backtrace = {}; }; auto callback_operation_option_names = std::unordered_map{}; auto buffered_operation_option_names = std::unordered_map{}; std::unordered_set get_operations_impl(rocprofiler_callback_tracing_kind_t kindv, const std::string& optname = {}) { static const auto callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); if(optname.empty()) { auto _ret = std::unordered_set{}; for(auto iitr : callback_tracing_info[kindv].items()) { if(iitr.second && *iitr.second != "none") _ret.emplace(iitr.first); } return _ret; } auto _val = get_setting_value(optname); ROCPROFSYS_CONDITIONAL_ABORT_F(!_val, "no setting %s\n", optname.c_str()); if(_val->empty()) return std::unordered_set{}; auto _ret = std::unordered_set{}; for(const auto& itr : tim::delimit(*_val, " ,;:\n\t")) { for(auto iitr : callback_tracing_info[kindv].items()) { auto _re = std::regex{ itr, std::regex_constants::icase }; if(iitr.second && std::regex_search(iitr.second->data(), _re)) { ROCPROFSYS_PRINT_F("%s ('%s') matched: %s\n", optname.c_str(), itr.c_str(), iitr.second->data()); _ret.emplace(iitr.first); } } } return _ret; } std::unordered_set get_operations_impl(rocprofiler_buffer_tracing_kind_t kindv, const std::string& optname = {}) { static const auto buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); if(optname.empty()) { auto _ret = std::unordered_set{}; for(auto iitr : buffered_tracing_info[kindv].items()) { if(iitr.second && *iitr.second != "none") _ret.emplace(iitr.first); } return _ret; } auto _val = get_setting_value(optname); ROCPROFSYS_CONDITIONAL_ABORT_F(!_val, "no setting %s\n", optname.c_str()); if(_val->empty()) return std::unordered_set{}; auto _ret = std::unordered_set{}; for(const auto& itr : tim::delimit(*_val, " ,;:\n\t")) { for(auto iitr : buffered_tracing_info[kindv].items()) { auto _re = std::regex{ itr, std::regex_constants::icase }; if(iitr.second && std::regex_search(iitr.second->data(), _re)) { ROCPROFSYS_PRINT_F("%s ('%s') matched: %s\n", optname.c_str(), itr.c_str(), iitr.second->data()); _ret.emplace(iitr.first); } } } return _ret; } std::vector get_operations_impl(const std::unordered_set& _complete, const std::unordered_set& _include, const std::unordered_set& _exclude) { auto _convert = [](const auto& _dset) { auto _dret = std::vector{}; _dret.reserve(_dset.size()); for(auto itr : _dset) _dret.emplace_back(itr); std::sort(_dret.begin(), _dret.end()); return _dret; }; if(_include.empty() && _exclude.empty()) return _convert(_complete); auto _ret = (_include.empty()) ? _complete : _include; for(auto itr : _exclude) _ret.erase(itr); return _convert(_ret); } } // namespace void config_settings(const std::shared_ptr& _config) { // const auto agents = std::vector{}; const auto buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); const auto callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); auto _skip_domains = std::unordered_set{ "none", "correlation_id_retirement", "marker_core_api", "marker_control_api", "marker_name_api", "code_object" }; auto _domain_choices = std::vector{}; auto _add_domain = [&_domain_choices, &_skip_domains](std::string_view _domain) { auto _v = to_lower(_domain); if(_skip_domains.count(_v) == 0) { auto itr = std::find(_domain_choices.begin(), _domain_choices.end(), _v); if(itr == _domain_choices.end()) _domain_choices.emplace_back(_v); } }; static auto _option_names = std::unordered_set{}; auto _add_operation_settings = [&_config, &_skip_domains]( std::string_view _domain_name, const auto& _domain, auto& _operation_option_names) { auto _v = to_lower(_domain_name); if(_skip_domains.count(_v) > 0) return; auto _op_option_name = JOIN('_', "ROCPROFSYS_ROCM", _domain_name, "OPERATIONS"); auto _eop_option_name = JOIN('_', "ROCPROFSYS_ROCM", _domain_name, "OPERATIONS_EXCLUDE"); auto _bt_option_name = JOIN('_', "ROCPROFSYS_ROCM", _domain_name, "OPERATIONS_ANNOTATE_BACKTRACE"); auto _op_choices = std::vector{}; for(auto itr : _domain.operations) _op_choices.emplace_back(std::string{ itr }); if(_op_choices.empty()) return; _operation_option_names.emplace( _domain.value, operation_options{ _op_option_name, _eop_option_name, _bt_option_name }); if(_option_names.emplace(_op_option_name).second) { ROCPROFSYS_CONFIG_SETTING( std::string, _op_option_name.c_str(), "Inclusive filter for domain operations (for API domains, this selects " "the functions to trace) [regex supported]", std::string{}, "rocm", "rocprofiler-sdk", "advanced") ->set_choices(_op_choices); } if(_option_names.emplace(_eop_option_name).second) { ROCPROFSYS_CONFIG_SETTING( std::string, _eop_option_name.c_str(), "Exclusive filter for domain operations applied after the inclusive " "filter (for API domains, removes function from trace) [regex supported]", std::string{}, "rocm", "rocprofiler-sdk", "advanced") ->set_choices(_op_choices); } if(_option_names.emplace(_bt_option_name).second) { ROCPROFSYS_CONFIG_SETTING( std::string, _bt_option_name.c_str(), "Specification of domain operations which will record a backtrace (for " "API domains, this is a list of function names) [regex supported]", std::string{}, "rocm", "rocprofiler-sdk", "advanced") ->set_choices(_op_choices); } }; _domain_choices.reserve(buffered_tracing_info.size()); _domain_choices.reserve(callback_tracing_info.size()); _add_domain("hip_api"); _add_domain("hsa_api"); _add_domain("marker_api"); for(const auto& itr : buffered_tracing_info) _add_domain(itr.name); for(const auto& itr : callback_tracing_info) _add_domain(itr.name); std::sort(_domain_choices.begin(), _domain_choices.end()); namespace join = ::timemory::join; auto _domain_description = JOIN("", "Specification of ROCm domains to trace/profile. Choices: ", join::join(join::array_config{ ", ", "", "" }, _domain_choices)); ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_DOMAINS", _domain_description, std::string{ "hip_runtime_api,marker_api,kernel_dispatch," "memory_copy,scratch_memory,page_migration" }, "rocm", "rocprofiler-sdk") ->set_choices(_domain_choices); ROCPROFSYS_CONFIG_SETTING( std::string, "ROCPROFSYS_ROCM_EVENTS", "ROCm hardware counters. Use ':device=N' syntax to specify collection on device " "number N, e.g. ':device=0'. If no device specification is provided, the event " "is collected on every available device", "", "rocm", "hardware_counters"); _skip_domains.emplace("kernel_dispatch"); _skip_domains.emplace("page_migration"); _skip_domains.emplace("scratch_memory"); _add_operation_settings( "MARKER_API", callback_tracing_info[ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API], callback_operation_option_names); for(const auto& itr : callback_tracing_info) _add_operation_settings(itr.name, itr, callback_operation_option_names); for(const auto& itr : buffered_tracing_info) _add_operation_settings(itr.name, itr, buffered_operation_option_names); } std::unordered_set get_callback_domains() { const auto callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); const auto supported = std::unordered_set { ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API, ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, # if(ROCPROFILER_VERSION_MAJOR == 0 && ROCPROFILER_VERSION_MINOR >= 7) || \ ROCPROFILER_VERSION_MAJOR >= 1 ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API, # endif }; auto _data = std::unordered_set{}; auto _domains = tim::delimit(config::get_setting_value("ROCPROFSYS_ROCM_DOMAINS") .value_or(std::string{}), " ,;:\t\n"); const auto valid_choices = settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS")->get_choices(); auto invalid_domain = [&valid_choices](const auto& domainv) { return !std::any_of(valid_choices.begin(), valid_choices.end(), [&domainv](const auto& aitr) { return (aitr == domainv); }); }; for(const auto& itr : _domains) { if(invalid_domain(itr)) { ROCPROFSYS_THROW("unsupported ROCPROFSYS_ROCM_DOMAINS value: %s\n", itr.c_str()); } if(itr == "hsa_api") { for(auto eitr : { ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API }) _data.emplace(eitr); } else if(itr == "hip_api") { for(auto eitr : { ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API }) _data.emplace(eitr); } else if(itr == "marker_api" || itr == "roctx") { _data.emplace(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API); } else { for(size_t idx = 0; idx < callback_tracing_info.size(); ++idx) { auto ditr = callback_tracing_info[idx]; auto dval = static_cast(idx); if(itr == to_lower(ditr.name) && supported.count(dval) > 0) { _data.emplace(dval); break; } } } } return _data; } std::unordered_set get_buffered_domains() { const auto buffer_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); const auto supported = std::unordered_set{ ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, }; auto _data = std::unordered_set{}; auto _domains = tim::delimit(config::get_setting_value("ROCPROFSYS_ROCM_DOMAINS") .value_or(std::string{}), " ,;:\t\n"); const auto valid_choices = settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS")->get_choices(); auto invalid_domain = [&valid_choices](const auto& domainv) { return !std::any_of(valid_choices.begin(), valid_choices.end(), [&domainv](const auto& aitr) { return (aitr == domainv); }); }; for(const auto& itr : _domains) { if(invalid_domain(itr)) { ROCPROFSYS_THROW("unsupported ROCPROFSYS_ROCM_DOMAINS value: %s\n", itr.c_str()); } if(itr == "hsa_api") { for(auto eitr : { ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API, ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API }) _data.emplace(eitr); } else if(itr == "hip_api") { for(auto eitr : { ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API }) _data.emplace(eitr); } else if(itr == "marker_api" || itr == "roctx") { _data.emplace(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API); } else { for(size_t idx = 0; idx < buffer_tracing_info.size(); ++idx) { auto ditr = buffer_tracing_info[idx]; auto dval = static_cast(idx); if(itr == to_lower(ditr.name) && supported.count(dval) > 0) { _data.emplace(dval); break; } } } } return _data; } std::vector get_rocm_events() { return tim::delimit( get_setting_value("ROCPROFSYS_ROCM_EVENTS").value_or(std::string{}), " ,;\t\n"); } std::vector get_operations(rocprofiler_callback_tracing_kind_t kindv) { ROCPROFSYS_CONDITIONAL_ABORT_F( callback_operation_option_names.count(kindv) == 0, "callback_operation_operation_names does not have value for %i\n", kindv); auto _complete = get_operations_impl(kindv); auto _include = get_operations_impl( kindv, callback_operation_option_names.at(kindv).operations_include); auto _exclude = get_operations_impl( kindv, callback_operation_option_names.at(kindv).operations_exclude); return get_operations_impl(_complete, _include, _exclude); } std::vector get_operations(rocprofiler_buffer_tracing_kind_t kindv) { ROCPROFSYS_CONDITIONAL_ABORT_F( buffered_operation_option_names.count(kindv) == 0, "buffered_operation_option_names does not have value for %i\n", kindv); auto _complete = get_operations_impl(kindv); auto _include = get_operations_impl( kindv, buffered_operation_option_names.at(kindv).operations_include); auto _exclude = get_operations_impl( kindv, buffered_operation_option_names.at(kindv).operations_exclude); return get_operations_impl(_complete, _include, _exclude); } std::unordered_set get_backtrace_operations(rocprofiler_callback_tracing_kind_t kindv) { ROCPROFSYS_CONDITIONAL_ABORT_F( callback_operation_option_names.count(kindv) == 0, "callback_operation_operation_names does not have value for %i\n", kindv); auto _data = get_operations_impl( kindv, callback_operation_option_names.at(kindv).operations_annotate_backtrace); auto _ret = std::unordered_set{}; _ret.reserve(_data.size()); for(auto itr : _data) _ret.emplace(itr); return _ret; } std::unordered_set get_backtrace_operations(rocprofiler_buffer_tracing_kind_t kindv) { ROCPROFSYS_CONDITIONAL_ABORT_F( buffered_operation_option_names.count(kindv) == 0, "buffered_operation_option_names does not have value for %i\n", kindv); auto _data = get_operations_impl( kindv, buffered_operation_option_names.at(kindv).operations_annotate_backtrace); auto _ret = std::unordered_set{}; _ret.reserve(_data.size()); for(auto itr : _data) _ret.emplace(itr); return _ret; } } // namespace rocprofiler_sdk } // namespace rocprofsys #else namespace rocprofsys { namespace rocprofiler_sdk { void config_settings(const std::shared_ptr&) {} } // namespace rocprofiler_sdk } // namespace rocprofsys #endif