rocprofler_iterate_info workaround + omnitrace-avail update (#270)
* rocprofler_iterate_info workaround + omnitrace-avail update
- provides workaround for rocprofiler_iterate_info behavior change in ROCm 5.4.0-3
- update timemory submodule with argparse tweaks
- updates hsa_rsrc_factory.{hpp,cpp}
- colorized log in omnitrace-avail
- Bump version to 1.9.2
* Fix empty_base inheritance
- timemory's component::empty_base inherits from concepts::component so direct inheritance was removed
* Fix OMNITRACE_HIP_VERSION_COMPAT_STRING
- defined as "" when OMNITRACE_HIP_VERSION_MAJOR==0
* new defines + extra info
- define OMNITRACE_LIBRARY_ARCH (via CMAKE_LIBRARY_ARCHITECTURE)
- define OMNITRACE_SYSTEM_NAME (via CMAKE_SYSTEM_NAME)
- define OMNITRACE_SYSTEM_PROCESSOR (via CMAKE_SYSTEM_PROCESSOR)
- define OMNITRACE_SYSTEM_VERSION (via OMNITRACE_SYSTEM_VERSION)
- define OMNITRACE_COMPILER_ID (via CMAKE_CXX_COMPILER_ID)
- define OMNITRACE_COMPILER_VERSION (via CMAKE_CXX_COMPILER_VERSION)
- include this info in metadata
- include subset of this info in --version for bin tools
- tweak to perfetto verbose messages
[ROCm/rocprofiler-systems commit: 4ed5f3e67b]
Tento commit je obsažen v:
@@ -1 +1 @@
|
||||
1.9.1
|
||||
1.9.2
|
||||
|
||||
vendorováno
+1
-1
Submodul projects/rocprofiler-systems/external/timemory aktualizován: 1ab76c36ef...2b92a966d7
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "avail.hpp"
|
||||
#include "common.hpp"
|
||||
#include "common/defines.h"
|
||||
#include "component_categories.hpp"
|
||||
#include "defines.hpp"
|
||||
#include "enumerated_list.hpp"
|
||||
@@ -108,7 +109,11 @@ void
|
||||
write_hw_counter_info(std::ostream&, const array_t<bool, N>& = {},
|
||||
const array_t<bool, N>& = {}, const array_t<string_t, N>& = {});
|
||||
|
||||
int gpu_count = 0;
|
||||
namespace
|
||||
{
|
||||
// initialize HIP before main so that libomnitrace is not HSA_TOOLS_LIB
|
||||
int gpu_count = omnitrace::gpu::hip_device_count();
|
||||
} // namespace
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
@@ -174,11 +179,23 @@ main(int argc, char** argv)
|
||||
|
||||
parser_t parser("omnitrace-avail");
|
||||
|
||||
parser.enable_help();
|
||||
parser.enable_version("omnitrace-avail", "v" OMNITRACE_VERSION_STRING,
|
||||
OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION);
|
||||
|
||||
parser.set_help_width(40);
|
||||
auto _cols = std::get<0>(tim::utility::console::get_columns());
|
||||
if(_cols > parser.get_help_width() + 8)
|
||||
parser.set_description_width(
|
||||
std::min<int>(_cols - parser.get_help_width() - 8, 120));
|
||||
|
||||
parser.enable_help();
|
||||
parser.enable_version("omnitrace-avail", OMNITRACE_ARGPARSE_VERSION_INFO);
|
||||
|
||||
parser.add_argument({ "--monochrome" }, "Disable colorized output")
|
||||
.max_count(1)
|
||||
.dtype("bool")
|
||||
.action([&](parser_t& p) {
|
||||
auto _monochrome = p.get<bool>("monochrome");
|
||||
tim::log::monochrome() = _monochrome;
|
||||
p.set_use_color(!_monochrome);
|
||||
});
|
||||
parser.add_argument({ "--debug" }, "Enable debug messages")
|
||||
.max_count(1)
|
||||
.action([](parser_t& p) { debug_msg = p.get<bool>("debug"); });
|
||||
@@ -468,12 +485,20 @@ main(int argc, char** argv)
|
||||
}
|
||||
|
||||
#if OMNITRACE_USE_HIP > 0
|
||||
// initialize HIP and call rocm_metrics() which add choices to OMNITRACE_ROCM_EVENTS
|
||||
// setting
|
||||
auto _status = hipGetDeviceCount(&gpu_count);
|
||||
if(gpu_count > 0 && _status == hipSuccess)
|
||||
if(gpu_count > 0)
|
||||
{
|
||||
(void) omnitrace::rocprofiler::rocm_metrics();
|
||||
size_t _num_metrics = 0;
|
||||
try
|
||||
{
|
||||
// call to rocm_metrics() will add choices to OMNITRACE_ROCM_EVENTS setting
|
||||
// so always perform this call even if list of HW counters is not requested
|
||||
_num_metrics = omnitrace::rocprofiler::rocm_metrics().size();
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
verbprintf(0, "Retrieving the GPU HW counters failed: %s", _e.what());
|
||||
}
|
||||
verbprintf(0, "Found %i HIP devices and %zu GPU HW counters\n", gpu_count,
|
||||
_num_metrics);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -116,6 +116,8 @@ extern std::string settings_rexclude_exact;
|
||||
// leading matches, e.g. OMNITRACE_MPI_[A-Z_]+
|
||||
extern std::string settings_rexclude_begin;
|
||||
|
||||
constexpr size_t max_error_message_buffer_length = 4096;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
// functions
|
||||
|
||||
@@ -149,18 +151,27 @@ file_exists(const std::string&);
|
||||
// control debug printf statements
|
||||
#define errprintf(LEVEL, ...) \
|
||||
{ \
|
||||
if(werror || LEVEL < 0) \
|
||||
if(LEVEL < verbose_level) \
|
||||
{ \
|
||||
if(debug_msg || verbose_level >= LEVEL) \
|
||||
{ \
|
||||
fprintf(stderr, "%s", tim::log::color::fatal()); \
|
||||
fprintf(stderr, "[omnitrace][avail] Error! " __VA_ARGS__); \
|
||||
char _buff[FUNCNAMELEN]; \
|
||||
sprintf(_buff, "[omnitrace][avail] Error! " __VA_ARGS__); \
|
||||
fprintf(stderr, "%s", tim::log::color::end()); \
|
||||
} \
|
||||
char _buff[max_error_message_buffer_length]; \
|
||||
snprintf(_buff, max_error_message_buffer_length, \
|
||||
"[omnitrace][avail] Error! " __VA_ARGS__); \
|
||||
throw std::runtime_error(std::string{ _buff }); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if(debug_msg || verbose_level >= LEVEL) \
|
||||
{ \
|
||||
fprintf(stderr, "%s", tim::log::color::warning()); \
|
||||
fprintf(stderr, "[omnitrace][avail] Warning! " __VA_ARGS__); \
|
||||
fprintf(stderr, "%s", tim::log::color::end()); \
|
||||
} \
|
||||
} \
|
||||
fflush(stderr); \
|
||||
}
|
||||
@@ -169,12 +180,19 @@ file_exists(const std::string&);
|
||||
#define verbprintf(LEVEL, ...) \
|
||||
{ \
|
||||
if(debug_msg || verbose_level >= LEVEL) \
|
||||
{ \
|
||||
fprintf(stderr, "%s", tim::log::color::info()); \
|
||||
fprintf(stderr, "[omnitrace][avail] " __VA_ARGS__); \
|
||||
fprintf(stderr, "%s", tim::log::color::end()); \
|
||||
} \
|
||||
fflush(stderr); \
|
||||
}
|
||||
|
||||
#define verbprintf_bare(LEVEL, ...) \
|
||||
{ \
|
||||
if(debug_msg || verbose_level >= LEVEL) fprintf(stderr, __VA_ARGS__); \
|
||||
if(debug_msg || verbose_level >= LEVEL) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
} \
|
||||
fflush(stderr); \
|
||||
}
|
||||
|
||||
@@ -549,8 +549,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env,
|
||||
});
|
||||
|
||||
parser.enable_help();
|
||||
parser.enable_version("omnitrace-causal", "v" OMNITRACE_VERSION_STRING,
|
||||
OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION);
|
||||
parser.enable_version("omnitrace-causal", OMNITRACE_ARGPARSE_VERSION_INFO);
|
||||
|
||||
auto _cols = std::get<0>(console::get_columns());
|
||||
if(_cols > parser.get_help_width() + 8)
|
||||
|
||||
+1
-2
@@ -461,8 +461,7 @@ main(int argc, char** argv)
|
||||
string_t extra_help = "-- <CMD> <ARGS>";
|
||||
|
||||
parser.enable_help();
|
||||
parser.enable_version("omnitrace", "v" OMNITRACE_VERSION_STRING,
|
||||
OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION);
|
||||
parser.enable_version("omnitrace-instrument", OMNITRACE_ARGPARSE_VERSION_INFO);
|
||||
|
||||
parser.add_argument({ "" }, "");
|
||||
parser.add_argument({ "[DEBUG OPTIONS]" }, "");
|
||||
|
||||
@@ -292,8 +292,7 @@ parse_args(int argc, char** argv, parser_data_t& _parser_data)
|
||||
});
|
||||
|
||||
parser.enable_help("", "Usage: omnitrace-run <OPTIONS> -- <COMMAND> <ARGS>");
|
||||
parser.enable_version("omnitrace-run", "v" OMNITRACE_VERSION_STRING,
|
||||
OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION);
|
||||
parser.enable_version("omnitrace-run", OMNITRACE_ARGPARSE_VERSION_INFO);
|
||||
|
||||
auto _cols = std::get<0>(console::get_columns());
|
||||
if(_cols > parser.get_help_width() + 8)
|
||||
|
||||
@@ -369,8 +369,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
|
||||
parser.set_use_color(true);
|
||||
parser.enable_help();
|
||||
parser.enable_version("omnitrace-sample", "v" OMNITRACE_VERSION_STRING,
|
||||
OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION);
|
||||
parser.enable_version("omnitrace-sample", OMNITRACE_ARGPARSE_VERSION_INFO);
|
||||
|
||||
auto _cols = std::get<0>(tim::utility::console::get_columns());
|
||||
if(_cols > parser.get_help_width() + 8)
|
||||
|
||||
@@ -30,6 +30,17 @@
|
||||
#define OMNITRACE_GIT_DESCRIBE "@OMNITRACE_GIT_DESCRIBE@"
|
||||
#define OMNITRACE_GIT_REVISION "@OMNITRACE_GIT_REVISION@"
|
||||
|
||||
// system info during compilation
|
||||
#define OMNITRACE_LIBRARY_ARCH "@CMAKE_LIBRARY_ARCHITECTURE@"
|
||||
#define OMNITRACE_SYSTEM_NAME "@CMAKE_SYSTEM_NAME@"
|
||||
#define OMNITRACE_SYSTEM_PROCESSOR "@CMAKE_SYSTEM_PROCESSOR@"
|
||||
#define OMNITRACE_SYSTEM_VERSION "@CMAKE_SYSTEM_VERSION@"
|
||||
|
||||
// compiler information
|
||||
#define OMNITRACE_COMPILER_ID "@CMAKE_CXX_COMPILER_ID@"
|
||||
#define OMNITRACE_COMPILER_VERSION "@CMAKE_CXX_COMPILER_VERSION@"
|
||||
#define OMNITRACE_COMPILER_STRING OMNITRACE_COMPILER_ID " v" OMNITRACE_COMPILER_VERSION
|
||||
|
||||
#define OMNITRACE_DEFAULT_ROCM_PATH "@ROCmVersion_DIR@"
|
||||
#define OMNITRACE_HIP_VERSION_STRING "@OMNITRACE_HIP_VERSION@"
|
||||
#define OMNITRACE_HIP_VERSION_MAJOR @OMNITRACE_HIP_VERSION_MAJOR@
|
||||
@@ -45,6 +56,27 @@
|
||||
((10000 * OMNITRACE_HIP_VERSION_MAJOR) + (100 * OMNITRACE_HIP_VERSION_MINOR) + \
|
||||
OMNITRACE_HIP_VERSION_PATCH)
|
||||
|
||||
#if OMNITRACE_HIP_VERSION_MAJOR > 0
|
||||
# define OMNITRACE_HIP_VERSION_COMPAT_STRING \
|
||||
"v@OMNITRACE_HIP_VERSION_MAJOR@.@OMNITRACE_HIP_VERSION_MINOR@.x"
|
||||
#else
|
||||
# define OMNITRACE_HIP_VERSION_COMPAT_STRING ""
|
||||
#endif
|
||||
|
||||
// this should be passed to argparse::argument_parser::enable_version
|
||||
// Example:
|
||||
// parser.enable_version(<name>, OMNITRACE_ARGPARSE_VERSION_INFO);
|
||||
#if !defined(OMNITRACE_ARGPARSE_VERSION_INFO)
|
||||
# define OMNITRACE_ARGPARSE_VERSION_INFO \
|
||||
"v" OMNITRACE_VERSION_STRING, OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION, \
|
||||
{ \
|
||||
{ "", OMNITRACE_LIBRARY_ARCH }, { "compiler", OMNITRACE_COMPILER_STRING }, \
|
||||
{ \
|
||||
"rocm", OMNITRACE_HIP_VERSION_COMPAT_STRING \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
#if !defined(OMNITRACE_MAX_THREADS)
|
||||
# define OMNITRACE_MAX_THREADS @OMNITRACE_MAX_THREADS@
|
||||
|
||||
@@ -255,6 +255,14 @@ configure_settings(bool _init)
|
||||
tim::manager::add_metadata("OMNITRACE_GIT_DESCRIBE", OMNITRACE_GIT_DESCRIBE);
|
||||
tim::manager::add_metadata("OMNITRACE_GIT_REVISION", OMNITRACE_GIT_REVISION);
|
||||
|
||||
tim::manager::add_metadata("OMNITRACE_LIBRARY_ARCH", OMNITRACE_LIBRARY_ARCH);
|
||||
tim::manager::add_metadata("OMNITRACE_SYSTEM_NAME", OMNITRACE_SYSTEM_NAME);
|
||||
tim::manager::add_metadata("OMNITRACE_SYSTEM_PROCESSOR", OMNITRACE_SYSTEM_PROCESSOR);
|
||||
tim::manager::add_metadata("OMNITRACE_SYSTEM_VERSION", OMNITRACE_SYSTEM_VERSION);
|
||||
|
||||
tim::manager::add_metadata("OMNITRACE_COMPILER_ID", OMNITRACE_COMPILER_ID);
|
||||
tim::manager::add_metadata("OMNITRACE_COMPILER_VERSION", OMNITRACE_COMPILER_VERSION);
|
||||
|
||||
#if OMNITRACE_HIP_VERSION > 0
|
||||
tim::manager::add_metadata("OMNITRACE_HIP_VERSION", OMNITRACE_HIP_VERSION_STRING);
|
||||
tim::manager::add_metadata("OMNITRACE_HIP_VERSION_MAJOR",
|
||||
@@ -1511,38 +1519,37 @@ print_banner(std::ostream& _os)
|
||||
\______/ |__| |__| |__| \__| |__| |__| | _| `._____/__/ \__\ \______||_______|
|
||||
|
||||
)banner";
|
||||
auto _tag = std::string_view{ OMNITRACE_GIT_DESCRIBE };
|
||||
auto _rev = std::string_view{ OMNITRACE_GIT_REVISION };
|
||||
#if OMNITRACE_HIP_VERSION_MAJOR > 0
|
||||
auto _hip = JOIN('.', OMNITRACE_HIP_VERSION_MAJOR, OMNITRACE_HIP_VERSION_MINOR, "x");
|
||||
#else
|
||||
auto _hip = std::string_view{};
|
||||
#endif
|
||||
|
||||
std::stringstream _version_info{};
|
||||
_version_info << "omnitrace v" << OMNITRACE_VERSION_STRING;
|
||||
if(!_tag.empty() || !_rev.empty() || !_hip.empty())
|
||||
{
|
||||
_version_info << " (";
|
||||
if(!_tag.empty())
|
||||
{
|
||||
_version_info << "tag: " << OMNITRACE_GIT_DESCRIBE;
|
||||
if(!_rev.empty()) _version_info << ", ";
|
||||
}
|
||||
|
||||
if(!_rev.empty())
|
||||
{
|
||||
_version_info << "rev: " << OMNITRACE_GIT_REVISION;
|
||||
if(!_hip.empty()) _version_info << ", ";
|
||||
}
|
||||
namespace join = ::timemory::join;
|
||||
|
||||
if(!_hip.empty())
|
||||
{
|
||||
_version_info << "rocm: " << _hip;
|
||||
}
|
||||
}
|
||||
// assemble the list of properties
|
||||
auto _generate_properties =
|
||||
[](std::initializer_list<std::pair<std::string, std::string>>&& _data) {
|
||||
auto _property_info = std::vector<std::string>{};
|
||||
_property_info.reserve(_data.size());
|
||||
for(const auto& itr : _data)
|
||||
{
|
||||
if(!itr.second.empty())
|
||||
_property_info.emplace_back(
|
||||
itr.first.empty() ? itr.second
|
||||
: join::join(": ", itr.first, itr.second));
|
||||
}
|
||||
return _property_info;
|
||||
};
|
||||
|
||||
if(!_version_info.str().empty()) _version_info << ")";
|
||||
auto _properties =
|
||||
_generate_properties({ { "rev", OMNITRACE_GIT_REVISION },
|
||||
{ "tag", OMNITRACE_GIT_DESCRIBE },
|
||||
{ "", OMNITRACE_LIBRARY_ARCH },
|
||||
{ "compiler", OMNITRACE_COMPILER_STRING },
|
||||
{ "rocm", OMNITRACE_HIP_VERSION_COMPAT_STRING } });
|
||||
|
||||
// <NAME> <VERSION> (<PROPERTIES>)
|
||||
if(!_properties.empty())
|
||||
_version_info << join::join(join::array_config{ ", ", " (", ")" }, _properties);
|
||||
|
||||
tim::log::stream(_os, tim::log::color::info()) << _banner << _version_info.str();
|
||||
_os << std::endl;
|
||||
|
||||
@@ -124,12 +124,12 @@ start()
|
||||
}
|
||||
else
|
||||
{
|
||||
OMNITRACE_VERBOSE(0, "Resuming perfetto...\n");
|
||||
OMNITRACE_VERBOSE(2, "Resuming perfetto...\n");
|
||||
_tmp_file->fopen("a+");
|
||||
}
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE(0, "Setup perfetto...\n");
|
||||
OMNITRACE_VERBOSE(2, "Setup perfetto...\n");
|
||||
int _fd = (_tmp_file) ? _tmp_file->fd : -1;
|
||||
auto& cfg = get_config();
|
||||
tracing_session->Setup(cfg, _fd);
|
||||
|
||||
+2
-6
@@ -45,17 +45,13 @@ namespace causal
|
||||
{
|
||||
namespace component
|
||||
{
|
||||
struct sample_rate
|
||||
: tim::component::empty_base
|
||||
, tim::concepts::component
|
||||
struct sample_rate : comp::empty_base
|
||||
{
|
||||
using value_type = void;
|
||||
static void sample(int = -1);
|
||||
};
|
||||
|
||||
struct backtrace
|
||||
: tim::component::empty_base
|
||||
, tim::concepts::component
|
||||
struct backtrace : comp::empty_base
|
||||
{
|
||||
using value_type = void;
|
||||
using sample_data_set_t = std::set<sample_data>;
|
||||
|
||||
@@ -38,9 +38,7 @@ namespace omnitrace
|
||||
{
|
||||
namespace causal
|
||||
{
|
||||
struct delay
|
||||
: tim::component::empty_base
|
||||
, tim::concepts::component
|
||||
struct delay : comp::empty_base
|
||||
{
|
||||
using value_type = void;
|
||||
|
||||
|
||||
+1
-3
@@ -45,9 +45,7 @@ namespace omnitrace
|
||||
{
|
||||
namespace component
|
||||
{
|
||||
struct backtrace
|
||||
: tim::component::empty_base
|
||||
, tim::concepts::component
|
||||
struct backtrace : comp::empty_base
|
||||
{
|
||||
static constexpr size_t stack_depth = OMNITRACE_MAX_UNWIND_DEPTH;
|
||||
|
||||
|
||||
+1
-3
@@ -51,9 +51,7 @@ using type_list = ::tim::type_list<Tp...>;
|
||||
|
||||
namespace component
|
||||
{
|
||||
struct backtrace_metrics
|
||||
: tim::component::empty_base
|
||||
, concepts::component
|
||||
struct backtrace_metrics : comp::empty_base
|
||||
{
|
||||
static constexpr size_t num_hw_counters = TIMEMORY_PAPI_ARRAY_SIZE;
|
||||
|
||||
|
||||
+1
-3
@@ -38,9 +38,7 @@ namespace omnitrace
|
||||
{
|
||||
namespace component
|
||||
{
|
||||
struct backtrace_timestamp
|
||||
: tim::component::empty_base
|
||||
, tim::concepts::component
|
||||
struct backtrace_timestamp : comp::empty_base
|
||||
{
|
||||
using value_type = void;
|
||||
|
||||
|
||||
+2
-3
@@ -34,12 +34,11 @@ namespace omnitrace
|
||||
namespace component
|
||||
{
|
||||
struct cpu_freq
|
||||
: tim::concepts::component
|
||||
, tim::component::empty_base
|
||||
: comp::empty_base
|
||||
, tim::component::base_format<cpu_freq>
|
||||
, tim::component::base_data<std::vector<uint64_t>, 1>
|
||||
{
|
||||
using base_type = tim::component::empty_base;
|
||||
using base_type = comp::empty_base;
|
||||
using this_type = cpu_freq;
|
||||
using value_type = std::vector<uint64_t>;
|
||||
using storage_type = tim::storage<cpu_freq, value_type>;
|
||||
|
||||
@@ -127,6 +127,8 @@ extern "C"
|
||||
|
||||
void OnLoadToolProp(rocprofiler_settings_t* settings)
|
||||
{
|
||||
using ::rocprofiler::util::HsaRsrcFactory;
|
||||
|
||||
OMNITRACE_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Loading...\n");
|
||||
|
||||
rocm::lock_t _lk{ rocm::rocm_mutex, std::defer_lock };
|
||||
@@ -332,6 +334,8 @@ extern "C"
|
||||
}
|
||||
else
|
||||
{
|
||||
using ::rocprofiler::util::HsaRsrcFactory;
|
||||
|
||||
HsaRsrcFactory::Instance().PrintGpuAgents("ROCm");
|
||||
}
|
||||
|
||||
|
||||
+128
-42
@@ -28,6 +28,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
@@ -43,8 +44,23 @@ THE SOFTWARE.
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
// Demangle C++ symbol name
|
||||
static const char*
|
||||
cpp_demangle(const char* symname)
|
||||
{
|
||||
size_t size = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(symname, nullptr, &size, &status);
|
||||
return (ret != nullptr) ? ret : strdup(symname);
|
||||
}
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
hsa_status_t
|
||||
HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data)
|
||||
@@ -168,10 +184,20 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa)
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == nullptr) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// Time correlation
|
||||
const uint32_t corr_iters = 1000;
|
||||
for(unsigned time_id = 0; time_id < HsaTimer::TIME_ID_NUMBER; time_id += 1)
|
||||
{
|
||||
CorrelateTime((HsaTimer::time_id_t) time_id, corr_iters);
|
||||
}
|
||||
|
||||
// System timeout
|
||||
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX)
|
||||
? timeout_ns_
|
||||
: timer_->ns_to_sysclock(timeout_ns_);
|
||||
|
||||
// To dump code objects
|
||||
to_dump_code_obj_ = getenv("ROCP_DUMP_CODEOBJ");
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
@@ -214,12 +240,12 @@ HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table)
|
||||
|
||||
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
|
||||
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed =
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed =
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_add_write_index_scacq_screl =
|
||||
table->core_->hsa_queue_add_write_index_scacq_screl_fn;
|
||||
|
||||
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
|
||||
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
|
||||
@@ -236,7 +262,8 @@ HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table)
|
||||
table->core_->hsa_executable_create_alt_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object =
|
||||
table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_destroy = table->core_->hsa_executable_destroy_fn;
|
||||
hsa_api_.hsa_executable_get_symbol =
|
||||
table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info =
|
||||
@@ -277,12 +304,12 @@ HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table)
|
||||
|
||||
hsa_api_.hsa_queue_create = hsa_queue_create;
|
||||
hsa_api_.hsa_queue_destroy = hsa_queue_destroy;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
hsa_queue_load_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed =
|
||||
hsa_queue_store_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed =
|
||||
hsa_queue_load_read_index_relaxed;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
hsa_queue_load_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_add_write_index_scacq_screl =
|
||||
hsa_queue_add_write_index_scacq_screl;
|
||||
|
||||
hsa_api_.hsa_signal_create = hsa_signal_create;
|
||||
hsa_api_.hsa_signal_destroy = hsa_signal_destroy;
|
||||
@@ -297,6 +324,7 @@ HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table)
|
||||
hsa_api_.hsa_executable_load_agent_code_object =
|
||||
hsa_executable_load_agent_code_object;
|
||||
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
|
||||
hsa_api_.hsa_executable_destroy = hsa_executable_destroy;
|
||||
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
|
||||
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
|
||||
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
|
||||
@@ -400,8 +428,10 @@ HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent)
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
strncpy(agent_info->gfxip, agent_info->name, 4);
|
||||
agent_info->gfxip[4] = '\0';
|
||||
const int gfxip_label_len =
|
||||
std::min(strlen(agent_info->name) - 2, sizeof(agent_info->gfxip) - 1);
|
||||
memcpy(agent_info->gfxip, agent_info->name, gfxip_label_len);
|
||||
agent_info->gfxip[gfxip_label_len] = '\0';
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE,
|
||||
&agent_info->max_wave_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
|
||||
@@ -437,7 +467,12 @@ HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent)
|
||||
agent_info->vgpr_block_size = 4;
|
||||
|
||||
// Set GPU index
|
||||
agent_info->dev_index = gpu_list_.size();
|
||||
uint32_t driver_node_id;
|
||||
status = hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&driver_node_id);
|
||||
CHECK_STATUS("hsa_agent_get_info(gpu hsa_driver_node_id)", status);
|
||||
agent_info->dev_index = driver_node_id;
|
||||
gpu_list_.push_back(agent_info);
|
||||
gpu_agents_.push_back(agent);
|
||||
}
|
||||
@@ -646,22 +681,27 @@ HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size)
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
void
|
||||
HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const
|
||||
hsa_signal_value_t
|
||||
HsaRsrcFactory::SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const
|
||||
{
|
||||
const hsa_signal_value_t exp_value = signal_value - 1;
|
||||
hsa_signal_value_t ret_value = signal_value;
|
||||
while(true)
|
||||
{
|
||||
const hsa_signal_value_t signal_value = hsa_api_.hsa_signal_wait_scacquire(
|
||||
signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if(signal_value == 0)
|
||||
ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT,
|
||||
signal_value, timeout_,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
if(ret_value == exp_value) break;
|
||||
if(ret_value != signal_value)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
CHECK_STATUS("hsa_signal_wait_scacquire()", HSA_STATUS_ERROR);
|
||||
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value("
|
||||
<< signal_value << "), ret_value(" << ret_value << ")" << std::endl
|
||||
<< std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
@@ -669,7 +709,7 @@ void
|
||||
HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const
|
||||
{
|
||||
SignalWait(signal);
|
||||
SignalWait(signal, signal_value);
|
||||
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
|
||||
@@ -686,7 +726,7 @@ HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, siz
|
||||
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size,
|
||||
0, nullptr, s);
|
||||
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
|
||||
SignalWait(s);
|
||||
SignalWait(s, 1);
|
||||
status = hsa_api_.hsa_signal_destroy(s);
|
||||
CHECK_STATUS("hsa_signal_destroy()", status);
|
||||
}
|
||||
@@ -766,6 +806,8 @@ HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_pa
|
||||
agent_info->dev_id, 0, &kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
close(file_handle);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
@@ -808,20 +850,36 @@ HsaRsrcFactory::PrintGpuAgents(const std::string&)
|
||||
return true;
|
||||
}
|
||||
|
||||
void*
|
||||
HsaRsrcFactory::GetSlotPointer(hsa_queue_t* queue, const uint64_t& idx)
|
||||
{
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
const uint32_t slot_idx = (uint32_t)(idx % queue->size);
|
||||
void* queue_slot = reinterpret_cast<void*>((uintptr_t)(queue->base_address) +
|
||||
(slot_idx * slot_size_b));
|
||||
return queue_slot;
|
||||
}
|
||||
|
||||
void*
|
||||
HsaRsrcFactory::GetReadPointer(hsa_queue_t* queue)
|
||||
{
|
||||
const uint64_t read_idx = hsa_api_.hsa_queue_load_read_index_relaxed(queue);
|
||||
return GetSlotPointer(queue, read_idx);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet)
|
||||
{
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
|
||||
// adevance command queue
|
||||
const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue);
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1);
|
||||
const uint64_t write_idx = hsa_api_.hsa_queue_add_write_index_scacq_screl(queue, 1);
|
||||
while((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size)
|
||||
{
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
const uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
uint32_t* queue_slot = reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) +
|
||||
(slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
@@ -862,29 +920,32 @@ HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes
|
||||
}
|
||||
|
||||
const char*
|
||||
HsaRsrcFactory::GetKernelName(uint64_t addr)
|
||||
HsaRsrcFactory::GetKernelNameRef(uint64_t addr)
|
||||
{
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if(it == symbols_map_->end())
|
||||
{
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
fprintf(stderr,
|
||||
"HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n",
|
||||
addr);
|
||||
abort();
|
||||
}
|
||||
return strdup(it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void
|
||||
HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table)
|
||||
{
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
HsaRsrcFactory::executable_symbols_cb(hsa_executable_t /*exec*/,
|
||||
hsa_executable_symbol_t symbol, void* /*data*/)
|
||||
hsa_executable_symbol_t symbol, void* data)
|
||||
{
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t) 0;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
@@ -900,17 +961,26 @@ HsaRsrcFactory::executable_symbols_cb(hsa_executable_t /*exec*/,
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char* name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
char* symname = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symname);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
auto ret = symbols_map_->insert({ addr, name });
|
||||
if(ret.second == false)
|
||||
symname[len] = 0;
|
||||
if(data == nullptr)
|
||||
{
|
||||
delete[] ret.first->second;
|
||||
ret.first->second = name;
|
||||
const char* name = cpp_demangle(symname);
|
||||
auto ret = symbols_map_->insert({ addr, name });
|
||||
if(ret.second == false)
|
||||
{
|
||||
delete[] ret.first->second;
|
||||
ret.first->second = name;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
symbols_map_->erase(addr);
|
||||
}
|
||||
delete[] symname;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -925,7 +995,19 @@ HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
executable, executable_symbols_cb, nullptr);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);
|
||||
;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
HsaRsrcFactory::hsa_executable_destroy_interceptor(hsa_executable_t executable)
|
||||
{
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if(symbols_map_ != nullptr)
|
||||
{
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(
|
||||
executable, executable_symbols_cb, (void*) 1);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
}
|
||||
return hsa_api_.hsa_executable_destroy(executable);
|
||||
}
|
||||
|
||||
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
|
||||
@@ -934,3 +1016,7 @@ HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP
|
||||
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
|
||||
bool HsaRsrcFactory::executable_tracking_on_ = false;
|
||||
HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = nullptr;
|
||||
void* HsaRsrcFactory::to_dump_code_obj_ = nullptr;
|
||||
|
||||
} // namespace util
|
||||
} // namespace rocprofiler
|
||||
|
||||
+122
-12
@@ -38,6 +38,7 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
@@ -77,6 +78,10 @@
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
static const size_t MEM_PAGE_BYTES = 0x1000;
|
||||
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
@@ -88,11 +93,12 @@ struct hsa_pfn_t
|
||||
decltype(::hsa_agent_get_info)* hsa_agent_get_info;
|
||||
decltype(::hsa_iterate_agents)* hsa_iterate_agents;
|
||||
|
||||
decltype(::hsa_queue_create)* hsa_queue_create;
|
||||
decltype(::hsa_queue_destroy)* hsa_queue_destroy;
|
||||
decltype(::hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
|
||||
decltype(::hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
|
||||
decltype(::hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
|
||||
decltype(::hsa_queue_create)* hsa_queue_create;
|
||||
decltype(::hsa_queue_destroy)* hsa_queue_destroy;
|
||||
decltype(::hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
|
||||
decltype(::hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
|
||||
decltype(
|
||||
::hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl;
|
||||
|
||||
decltype(::hsa_signal_create)* hsa_signal_create;
|
||||
decltype(::hsa_signal_destroy)* hsa_signal_destroy;
|
||||
@@ -107,6 +113,7 @@ struct hsa_pfn_t
|
||||
decltype(
|
||||
::hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(::hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(::hsa_executable_destroy)* hsa_executable_destroy;
|
||||
decltype(::hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
decltype(::hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
|
||||
decltype(::hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
|
||||
@@ -180,10 +187,11 @@ struct AgentInfo
|
||||
// Number of Shader Arrays Per Shader Engines in Gpu
|
||||
uint32_t shader_arrays_per_se;
|
||||
|
||||
// SGPR/VGPR block sizes
|
||||
uint32_t sgpr_block_dflt;
|
||||
uint32_t sgpr_block_size;
|
||||
uint32_t vgpr_block_size;
|
||||
// SGPR/VGPR/LDS block sizes
|
||||
uint32_t sgpr_block_dflt;
|
||||
uint32_t sgpr_block_size;
|
||||
uint32_t vgpr_block_size;
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
};
|
||||
|
||||
// HSA timer class
|
||||
@@ -195,6 +203,16 @@ public:
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
enum time_id_t
|
||||
{
|
||||
TIME_ID_CLOCK_REALTIME = 0,
|
||||
TIME_ID_CLOCK_REALTIME_COARSE = 1,
|
||||
TIME_ID_CLOCK_MONOTONIC = 2,
|
||||
TIME_ID_CLOCK_MONOTONIC_COARSE = 3,
|
||||
TIME_ID_CLOCK_MONOTONIC_RAW = 4,
|
||||
TIME_ID_NUMBER
|
||||
};
|
||||
|
||||
HsaTimer(const hsa_pfn_t* hsa_api)
|
||||
: hsa_api_(hsa_api)
|
||||
{
|
||||
@@ -215,6 +233,12 @@ public:
|
||||
return timestamp_t((freq_t) time / sysclock_factor_);
|
||||
}
|
||||
|
||||
// Method for timespec/ns conversion
|
||||
static timestamp_t timespec_to_ns(const timespec& time)
|
||||
{
|
||||
return ((timestamp_t) time.tv_sec * 1000000000) + time.tv_nsec;
|
||||
}
|
||||
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const
|
||||
{
|
||||
@@ -225,6 +249,57 @@ public:
|
||||
return sysclock_to_ns(sysclock);
|
||||
}
|
||||
|
||||
// Return time in 'ns'
|
||||
timestamp_t clocktime_ns(clockid_t clock_id) const
|
||||
{
|
||||
timespec time;
|
||||
clock_gettime(clock_id, &time);
|
||||
return timespec_to_ns(time);
|
||||
}
|
||||
|
||||
// Return pair of correlated values of profiling timestamp and time with
|
||||
// correlation error for a given time ID and number of iterations
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v,
|
||||
timestamp_t* time_v, timestamp_t* error_v)
|
||||
{
|
||||
clockid_t clock_id = 0;
|
||||
switch(time_id)
|
||||
{
|
||||
case TIME_ID_CLOCK_REALTIME: clock_id = CLOCK_REALTIME; break;
|
||||
case TIME_ID_CLOCK_REALTIME_COARSE: clock_id = CLOCK_REALTIME_COARSE; break;
|
||||
case TIME_ID_CLOCK_MONOTONIC: clock_id = CLOCK_MONOTONIC; break;
|
||||
case TIME_ID_CLOCK_MONOTONIC_COARSE: clock_id = CLOCK_MONOTONIC_COARSE; break;
|
||||
case TIME_ID_CLOCK_MONOTONIC_RAW: clock_id = CLOCK_MONOTONIC_RAW; break;
|
||||
default: CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
std::vector<timestamp_t> ts_vec(iters);
|
||||
std::vector<timespec> tm_vec(iters);
|
||||
const uint32_t steps = iters - 1;
|
||||
|
||||
for(uint32_t i = 0; i < iters; ++i)
|
||||
{
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]);
|
||||
clock_gettime(clock_id, &tm_vec[i]);
|
||||
}
|
||||
|
||||
const timestamp_t ts_base = sysclock_to_ns(ts_vec.front());
|
||||
const timestamp_t tm_base = timespec_to_ns(tm_vec.front());
|
||||
const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps);
|
||||
|
||||
timestamp_t ts_accum = 0;
|
||||
timestamp_t tm_accum = 0;
|
||||
for(uint32_t i = 0; i < iters; ++i)
|
||||
{
|
||||
ts_accum += (ts_vec[i] - ts_base);
|
||||
tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base);
|
||||
}
|
||||
|
||||
*timestamp_v = (ts_accum / iters) + ts_base + error;
|
||||
*time_v = (tm_accum / iters) + tm_base;
|
||||
*error_v = error;
|
||||
}
|
||||
|
||||
private:
|
||||
// Timestamp frequency factor
|
||||
freq_t sysclock_factor_;
|
||||
@@ -332,7 +407,8 @@ public:
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
void SignalWait(const hsa_signal_t& signal) const;
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal,
|
||||
@@ -359,14 +435,16 @@ public:
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Submit AQL packet to given queue
|
||||
// Utils for submitting AQL packet to a given queue
|
||||
static void* GetSlotPointer(hsa_queue_t* queue, const uint64_t& idx);
|
||||
static void* GetReadPointer(hsa_queue_t* queue);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
|
||||
|
||||
// Enable executables loading tracking
|
||||
static bool IsExecutableTracking() { return executable_tracking_on_; }
|
||||
static void EnableExecutableTracking(HsaApiTable* table);
|
||||
static const char* GetKernelName(uint64_t addr);
|
||||
static const char* GetKernelNameRef(uint64_t addr);
|
||||
|
||||
// Initialize HSA API table
|
||||
void static InitHsaApiTable(HsaApiTable* table);
|
||||
@@ -400,6 +478,29 @@ public:
|
||||
Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
|
||||
}
|
||||
|
||||
void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters)
|
||||
{
|
||||
timestamp_t timestamp_v = 0;
|
||||
timestamp_t time_v = 0;
|
||||
timestamp_t error_v = 0;
|
||||
timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v);
|
||||
time_shift_[time_id] = time_v - timestamp_v;
|
||||
time_error_[time_id] = error_v;
|
||||
}
|
||||
|
||||
hsa_status_t GetTimeVal(uint32_t time_id, uint64_t time_stamp, uint64_t* time_value)
|
||||
{
|
||||
if(time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*time_value = time_stamp + time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GetTimeErr(uint32_t time_id, uint64_t* err)
|
||||
{
|
||||
*err = time_error_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
private:
|
||||
// System agents iterating callback
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
|
||||
@@ -444,8 +545,10 @@ private:
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static void* to_dump_code_obj_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options);
|
||||
static hsa_status_t hsa_executable_destroy_interceptor(hsa_executable_t executable);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol, void* data);
|
||||
|
||||
@@ -466,7 +569,14 @@ private:
|
||||
// HSA timer
|
||||
HsaTimer* timer_;
|
||||
|
||||
// Time shift array to support time conversion
|
||||
timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER];
|
||||
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t* cpu_pool_;
|
||||
hsa_amd_memory_pool_t* kern_arg_pool_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -56,6 +56,9 @@ namespace rocprofiler
|
||||
{
|
||||
namespace
|
||||
{
|
||||
using ::rocprofiler::util::AgentInfo;
|
||||
using ::rocprofiler::util::HsaRsrcFactory;
|
||||
|
||||
auto&
|
||||
get_event_names()
|
||||
{
|
||||
@@ -271,30 +274,25 @@ metrics_input(unsigned _device, rocprofiler_feature_t** ret)
|
||||
return feature_count;
|
||||
}
|
||||
|
||||
struct info_data
|
||||
{
|
||||
const AgentInfo* agent = nullptr;
|
||||
std::vector<component::rocm_info_entry>* data = nullptr;
|
||||
};
|
||||
using info_data = std::vector<component::rocm_info_entry>;
|
||||
|
||||
hsa_status_t
|
||||
info_data_callback(const rocprofiler_info_data_t info, void* arg)
|
||||
{
|
||||
using qualifier_t = tim::hardware_counters::qualifier;
|
||||
using qualifier_vec_t = std::vector<qualifier_t>;
|
||||
auto* _arg = static_cast<info_data*>(arg);
|
||||
const auto* _agent = _arg->agent;
|
||||
auto* _data = _arg->data;
|
||||
auto* _data = static_cast<info_data*>(arg);
|
||||
auto _dev_index = info.agent_index;
|
||||
|
||||
switch(info.kind)
|
||||
{
|
||||
case ROCPROFILER_INFO_KIND_METRIC:
|
||||
{
|
||||
auto _device_qualifier_sym = JOIN("", ":device=", _agent->dev_index);
|
||||
auto _device_qualifier = tim::hardware_counters::qualifier{
|
||||
true, static_cast<int>(_agent->dev_index), _device_qualifier_sym,
|
||||
JOIN(" ", "Device", _agent->dev_index)
|
||||
};
|
||||
auto _device_qualifier_sym = JOIN("", ":device=", _dev_index);
|
||||
auto _device_qualifier =
|
||||
tim::hardware_counters::qualifier{ true, static_cast<int>(_dev_index),
|
||||
_device_qualifier_sym,
|
||||
JOIN(" ", "Device", _dev_index) };
|
||||
auto _long_desc = std::string{ info.metric.description };
|
||||
auto _units = std::string{};
|
||||
auto _pysym = std::string{};
|
||||
@@ -313,7 +311,7 @@ info_data_callback(const rocprofiler_info_data_t info, void* arg)
|
||||
{
|
||||
auto _sym = JOIN("", info.metric.name, _device_qualifier_sym);
|
||||
auto _short_desc =
|
||||
JOIN("", info.metric.name, " on device ", _agent->dev_index);
|
||||
JOIN("", info.metric.name, " on device ", _dev_index);
|
||||
_data->emplace_back(component::rocm_info_entry(
|
||||
true, tim::hardware_counters::api::rocm, _data->size(), 0, _sym,
|
||||
_pysym, _short_desc, _long_desc, _units,
|
||||
@@ -331,7 +329,7 @@ info_data_callback(const rocprofiler_info_data_t info, void* arg)
|
||||
auto _sym = JOIN("", info.metric.name, _instance_qualifier_sym,
|
||||
_device_qualifier_sym);
|
||||
auto _short_desc = JOIN("", info.metric.name, " instance ", i,
|
||||
" on device ", _agent->dev_index);
|
||||
" on device ", _dev_index);
|
||||
_data->emplace_back(component::rocm_info_entry(
|
||||
true, tim::hardware_counters::api::rocm, _data->size(), 0,
|
||||
_sym, _pysym, _short_desc, _long_desc, _units,
|
||||
@@ -369,12 +367,26 @@ rocm_metrics()
|
||||
const AgentInfo** _agent_p = &_agent;
|
||||
HsaRsrcFactory::Instance().GetGpuAgentInfo(i, _agent_p);
|
||||
|
||||
auto _v = info_data{ _agent, &_data };
|
||||
if(!rocm_check_status(
|
||||
rocprofiler_iterate_info(&_agent->dev_id, ROCPROFILER_INFO_KIND_METRIC,
|
||||
info_data_callback, reinterpret_cast<void*>(&_v)),
|
||||
{ HSA_STATUS_ERROR_NOT_INITIALIZED }))
|
||||
return _data;
|
||||
if(!rocm_check_status(rocprofiler_iterate_info(
|
||||
&_agent->dev_id, ROCPROFILER_INFO_KIND_METRIC,
|
||||
info_data_callback, reinterpret_cast<void*>(&_data)),
|
||||
{ HSA_STATUS_ERROR_NOT_INITIALIZED }))
|
||||
{
|
||||
OMNITRACE_WARNING_F(-1, "rocprofiler_iterate_info failed for gpu agent %u\n",
|
||||
i);
|
||||
}
|
||||
}
|
||||
|
||||
if(gpu_count > 0 && _data.empty())
|
||||
{
|
||||
if(!rocm_check_status(rocprofiler_iterate_info(
|
||||
nullptr, ROCPROFILER_INFO_KIND_METRIC,
|
||||
info_data_callback, reinterpret_cast<void*>(&_data)),
|
||||
{ HSA_STATUS_ERROR_NOT_INITIALIZED }))
|
||||
{
|
||||
OMNITRACE_WARNING_F(-1, "rocprofiler_iterate_info failed for %i gpu agents\n",
|
||||
gpu_count);
|
||||
}
|
||||
}
|
||||
|
||||
auto _settings = tim::settings::shared_instance();
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele