From 3e64cedc0c208bf1982ecc35061c2f1ad63f6962 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 1 Nov 2024 23:12:51 -0500 Subject: [PATCH] SDK: create CMake option for strict checks on CPU vs. GPU timestamps (#1159) * SDK: create CMake option for strict checks on CPU vs. GPU timestamps - Configurating CMake with `ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS=ON` will enable fatal errors if dispatch/memcpy timestamps on GPU are outside of the start/end time from the CPU - `ROCPROFIELR_BUILD_CI_STRICT_TIMESTAMPS` defaults to the value of `ROCPROFILER_BUILD_CI` * Formatting * Disable async_copy frequency scaling * Disable profiling dispatch time frequency scaling * Support runtime configuration via env variables - ROCPROFILER_CI_FREQ_SCALE_TIMESTAMPS env variable will enable scaling the timestamps based on the hsa timestamp period - ROCPROFILER_CI_STRICT_TIMESTAMPS env variable will enable strict timestamp checks - when cmake is configured with ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS=ON, this env variable defaults to true * ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS defaults to OFF * Update cmake-target * Common tracing::adjust_profiling_time --------- Co-authored-by: Gopesh Bhardwaj [ROCm/rocprofiler-sdk commit: ad48201912995e1db4f6e65266bce2792056b3c6] --- .../cmake/rocprofiler_build_settings.cmake | 5 + .../cmake/rocprofiler_options.cmake | 3 + .../lib/rocprofiler-sdk/hsa/async_copy.cpp | 64 +++------ .../kernel_dispatch/profiling_time.cpp | 90 +++--------- .../kernel_dispatch/profiling_time.hpp | 13 +- .../kernel_dispatch/tracing.hpp | 4 +- .../rocprofiler-sdk/tracing/CMakeLists.txt | 4 +- .../tracing/profiling_time.cpp | 101 ++++++++++++++ .../tracing/profiling_time.hpp | 129 ++++++++++++++++++ 9 files changed, 282 insertions(+), 131 deletions(-) create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.cpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_build_settings.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_build_settings.cmake index 2ee13314c8..f1ddc2e7e0 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_build_settings.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_build_settings.cmake @@ -198,6 +198,11 @@ if(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) INTERFACE ROCPROFILER_UNSAFE_NO_VERSION_CHECK) endif() +if(ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS) + rocprofiler_target_compile_definitions(rocprofiler-sdk-build-flags + INTERFACE ROCPROFILER_CI_STRICT_TIMESTAMPS) +endif() + # ----------------------------------------------------------------------------------------# # user customization # diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake index 414c161f56..05b960694b 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake @@ -38,6 +38,9 @@ rocprofiler_add_option(ROCPROFILER_BUILD_TESTS "Enable building the tests" ${ROCPROFILER_BUILD_CI}) rocprofiler_add_option(ROCPROFILER_BUILD_SAMPLES "Enable building the code samples" ${ROCPROFILER_BUILD_CI}) +rocprofiler_add_option( + ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS + "Disable adjusting for clock skew b/t CPU and GPU timestamps" OFF ADVANCED) rocprofiler_add_option(ROCPROFILER_BUILD_CODECOV "Enable building for code coverage analysis" OFF) rocprofiler_add_option(ROCPROFILER_BUILD_DOCS diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp index 07f48a74b6..ddf94b4dd2 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp @@ -21,6 +21,8 @@ // THE SOFTWARE. #include "lib/rocprofiler-sdk/hsa/async_copy.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/environment.hpp" #include "lib/common/logging.hpp" #include "lib/common/scope_destructor.hpp" #include "lib/common/static_object.hpp" @@ -28,9 +30,9 @@ #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/hsa/hsa.hpp" -#include "lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/fwd.hpp" +#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" #include @@ -317,30 +319,6 @@ convert_hsa_handle(Up _hsa_object) return reinterpret_cast(_hsa_object.handle); } -hsa_amd_profiling_async_copy_time_t& -operator+=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs) -{ - lhs.start += rhs; - lhs.end += rhs; - return lhs; -} - -hsa_amd_profiling_async_copy_time_t& -operator-=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs) -{ - lhs.start -= rhs; - lhs.end -= rhs; - return lhs; -} - -hsa_amd_profiling_async_copy_time_t& -operator*=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs) -{ - lhs.start *= rhs; - lhs.end *= rhs; - return lhs; -} - bool async_copy_handler(hsa_signal_value_t signal_value, void* arg) { @@ -352,41 +330,38 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) return false; } - static auto sysclock_period = hsa::get_hsa_timestamp_period(); - auto ts = common::timestamp_ns(); auto* _data = static_cast(arg); auto copy_time = hsa_amd_profiling_async_copy_time_t{}; auto copy_time_status = get_amd_ext_table()->hsa_amd_profiling_get_async_copy_time_fn( _data->rocp_signal, ©_time); - // normalize - copy_time *= sysclock_period; + auto _profile_time = tracing::profiling_time{copy_time_status, copy_time.start, copy_time.end}; - // below is a hack for clock skew issues: - // the timestamp of this handler for the copy will always be after when the copy ended - if(ts < copy_time.end) copy_time -= (copy_time.end - ts); + if(_profile_time.status == HSA_STATUS_SUCCESS) + { + _profile_time = tracing::adjust_profiling_time( + "memcpy", + _profile_time, + tracing::profiling_time{HSA_STATUS_SUCCESS, _data->start_ts, ts}); - // below is a hack for clock skew issues: - // the timestamp of the function call triggering the copy will always be before when the copy - // started - if(copy_time.start < _data->start_ts) copy_time += (_data->start_ts - copy_time.start); - - // if we encounter this in CI, it will cause test to fail - ROCP_CI_LOG_IF(ERROR, copy_time_status == HSA_STATUS_SUCCESS && copy_time.end < copy_time.start) - << "hsa_amd_profiling_get_async_copy_time for returned async times where the end time (" - << copy_time.end << ") was less than the start time (" << copy_time.start << ")"; + // if we encounter this in CI, it will cause test to fail + ROCP_CI_LOG_IF(ERROR, _profile_time.end < _profile_time.start) + << "hsa_amd_profiling_get_async_copy_time for returned async times where the end time (" + << _profile_time.end << ") was less than the start time (" << _profile_time.start + << ")"; + } // get the contexts that were active when the signal was created const auto& tracing_data = _data->tracing_data; // we need to decrement this reference count at the end of the functions auto* _corr_id = _data->correlation_id; - if(copy_time_status == HSA_STATUS_SUCCESS && !tracing_data.empty()) + if(_profile_time.status == HSA_STATUS_SUCCESS && !tracing_data.empty()) { if(!_data->tracing_data.callback_contexts.empty()) { - auto _tracer_data = _data->get_callback_data(copy_time.start, copy_time.end); + auto _tracer_data = _data->get_callback_data(_profile_time.start, _profile_time.end); tracing::execute_phase_exit_callbacks(_data->tracing_data.callback_contexts, _data->tracing_data.external_correlation_ids, @@ -397,7 +372,8 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) if(!_data->tracing_data.buffered_contexts.empty()) { - auto record = _data->get_buffered_record(nullptr, copy_time.start, copy_time.end); + auto record = + _data->get_buffered_record(nullptr, _profile_time.start, _profile_time.end); tracing::execute_buffer_record_emplace(_data->tracing_data.buffered_contexts, _data->tid, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.cpp index 677e64671b..0f50a3b3b8 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.cpp @@ -21,10 +21,13 @@ // THE SOFTWARE. #include "lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/environment.hpp" #include "lib/common/logging.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/hsa/hsa.hpp" +#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp" #include @@ -36,93 +39,35 @@ namespace rocprofiler { namespace kernel_dispatch { -namespace -{ -hsa_amd_profiling_dispatch_time_t& -operator+=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs) -{ - lhs.start += rhs; - lhs.end += rhs; - return lhs; -} - -hsa_amd_profiling_dispatch_time_t& -operator-=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs) -{ - lhs.start -= rhs; - lhs.end -= rhs; - return lhs; -} - -hsa_amd_profiling_dispatch_time_t& -operator*=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs) -{ - lhs.start *= rhs; - lhs.end *= rhs; - return lhs; -} -} // namespace - -profiling_time& -profiling_time::operator+=(uint64_t offset) -{ - start += offset; - end += offset; - return *this; -} - -profiling_time& -profiling_time::operator-=(uint64_t offset) -{ - start -= offset; - end -= offset; - return *this; -} - -profiling_time& -profiling_time::operator*=(uint64_t scale) -{ - start *= scale; - end *= scale; - return *this; -} - profiling_time get_dispatch_time(hsa_agent_t _hsa_agent, hsa_signal_t _signal, rocprofiler_kernel_id_t _kernel_id, std::optional _baseline) { - static auto sysclock_period = hsa::get_hsa_timestamp_period(); - auto ts = common::timestamp_ns(); auto dispatch_time = hsa_amd_profiling_dispatch_time_t{}; auto dispatch_time_status = hsa::get_amd_ext_table()->hsa_amd_profiling_get_dispatch_time_fn( _hsa_agent, _signal, &dispatch_time); - if(dispatch_time_status == HSA_STATUS_SUCCESS) + auto _profile_time = + tracing::profiling_time{dispatch_time_status, dispatch_time.start, dispatch_time.end}; + + if(_profile_time.status == HSA_STATUS_SUCCESS) { // if we encounter this in CI, it will cause test to fail - ROCP_CI_LOG_IF(ERROR, dispatch_time.end < dispatch_time.start) + ROCP_CI_LOG_IF(ERROR, _profile_time.end < _profile_time.start) << "hsa_amd_profiling_get_dispatch_time for kernel_id=" << _kernel_id << " on rocprofiler_agent=" - << CHECK_NOTNULL(agent::get_rocprofiler_agent(_hsa_agent))->id.handle - << " returned dispatch times where the end time (" << dispatch_time.end - << ") was less than the start time (" << dispatch_time.start << ")"; + << CHECK_NOTNULL(agent::get_rocprofiler_agent(_hsa_agent))->node_id + << " returned dispatch times where the end time (" << _profile_time.end + << ") was less than the start time (" << _profile_time.start << ")"; - // normalize - dispatch_time *= sysclock_period; - - // below is a hack for clock skew issues: - // the timestamp of this handler for the kernel dispatch will always be after when the - // kernel completed - if(ts < dispatch_time.end) dispatch_time -= (dispatch_time.end - ts); - - // below is a hack for clock skew issues: - // the timestamp of the packet rewriter for the kernel packet will always be before when the - // kernel started - if(_baseline && dispatch_time.start < *_baseline) - dispatch_time += (*_baseline - dispatch_time.start); + _profile_time = tracing::adjust_profiling_time( + "dispatch", + _profile_time, + tracing::profiling_time{ + HSA_STATUS_SUCCESS, _baseline.value_or(dispatch_time.start), ts}); } else { @@ -133,8 +78,7 @@ get_dispatch_time(hsa_agent_t _hsa_agent, << " :: " << hsa::get_hsa_status_string(dispatch_time_status); } - return profiling_time{ - .status = dispatch_time_status, .start = dispatch_time.start, .end = dispatch_time.end}; + return _profile_time; } } // namespace kernel_dispatch } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp index 6715e95a78..21ca307df2 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp @@ -22,6 +22,8 @@ #pragma once +#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp" + #include #include @@ -34,16 +36,7 @@ namespace rocprofiler { namespace kernel_dispatch { -struct profiling_time -{ - hsa_status_t status = HSA_STATUS_ERROR_INVALID_SIGNAL; - uint64_t start = 0; - uint64_t end = 0; - - profiling_time& operator+=(uint64_t offset); - profiling_time& operator-=(uint64_t offset); - profiling_time& operator*=(uint64_t scale); -}; +using profiling_time = tracing::profiling_time; // get the profiling time for a signal on an agent, if start time is less than baseline, correct to // start at baseline. If kernel_id is provided, it will be included in error log message if there is diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.hpp index 451080a841..cfb588648a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.hpp @@ -23,7 +23,7 @@ #pragma once #include "lib/rocprofiler-sdk/hsa/queue_info_session.hpp" -// #include "lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp" +#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp" #include #include @@ -45,7 +45,7 @@ using context_t = context::context; using user_data_map_t = std::unordered_map; using external_corr_id_map_t = user_data_map_t; -struct profiling_time; +using profiling_time = tracing::profiling_time; profiling_time get_dispatch_time(const hsa::queue_info_session& session); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/CMakeLists.txt index afb314c3ab..3201345482 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/CMakeLists.txt @@ -1,6 +1,6 @@ # -set(ROCPROFILER_LIB_TRACING_SOURCES) -set(ROCPROFILER_LIB_TRACING_HEADERS fwd.hpp tracing.hpp) +set(ROCPROFILER_LIB_TRACING_SOURCES profiling_time.cpp) +set(ROCPROFILER_LIB_TRACING_HEADERS fwd.hpp profiling_time.hpp tracing.hpp) target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_TRACING_SOURCES} ${ROCPROFILER_LIB_TRACING_HEADERS}) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.cpp new file mode 100644 index 0000000000..971286edf2 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.cpp @@ -0,0 +1,101 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp" + +namespace rocprofiler +{ +namespace tracing +{ +hsa_amd_profiling_dispatch_time_t& +operator+=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs) +{ + lhs.start += rhs; + lhs.end += rhs; + return lhs; +} + +hsa_amd_profiling_dispatch_time_t& +operator-=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs) +{ + lhs.start -= rhs; + lhs.end -= rhs; + return lhs; +} + +hsa_amd_profiling_dispatch_time_t& +operator*=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs) +{ + lhs.start *= rhs; + lhs.end *= rhs; + return lhs; +} + +hsa_amd_profiling_async_copy_time_t& +operator+=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs) +{ + lhs.start += rhs; + lhs.end += rhs; + return lhs; +} + +hsa_amd_profiling_async_copy_time_t& +operator-=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs) +{ + lhs.start -= rhs; + lhs.end -= rhs; + return lhs; +} + +hsa_amd_profiling_async_copy_time_t& +operator*=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs) +{ + lhs.start *= rhs; + lhs.end *= rhs; + return lhs; +} + +profiling_time& +profiling_time::operator+=(uint64_t offset) +{ + start += offset; + end += offset; + return *this; +} + +profiling_time& +profiling_time::operator-=(uint64_t offset) +{ + start -= offset; + end -= offset; + return *this; +} + +profiling_time& +profiling_time::operator*=(uint64_t scale) +{ + start *= scale; + end *= scale; + return *this; +} +} // namespace tracing +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp new file mode 100644 index 0000000000..c0b0f8b882 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp @@ -0,0 +1,129 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/common/environment.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/mpl.hpp" +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" + +#include +#include + +#include + +#include + +namespace rocprofiler +{ +namespace tracing +{ +hsa_amd_profiling_dispatch_time_t& +operator+=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs); + +hsa_amd_profiling_dispatch_time_t& +operator-=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs); + +hsa_amd_profiling_dispatch_time_t& +operator*=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs); + +hsa_amd_profiling_async_copy_time_t& +operator+=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs); + +hsa_amd_profiling_async_copy_time_t& +operator-=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs); + +hsa_amd_profiling_async_copy_time_t& +operator*=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs); + +#if !defined(ROCPROFILER_CI_STRICT_TIMESTAMPS) +# define ROCPROFILER_CI_STRICT_TIMESTAMPS 0 +#endif + +struct profiling_time +{ + hsa_status_t status = HSA_STATUS_ERROR_INVALID_SIGNAL; + uint64_t start = 0; + uint64_t end = 0; + + profiling_time& operator+=(uint64_t offset); + profiling_time& operator-=(uint64_t offset); + profiling_time& operator*=(uint64_t scale); +}; + +inline profiling_time +adjust_profiling_time(std::string_view _label, profiling_time _value, profiling_time&& _bounds) +{ + static auto sysclock_period = hsa::get_hsa_timestamp_period(); + static auto normalize_env = common::get_env("ROCPROFILER_CI_FREQ_SCALE_TIMESTAMPS", false); + static auto strict_ts_env = common::get_env( + "ROCPROFILER_CI_STRICT_TIMESTAMPS", (ROCPROFILER_CI_STRICT_TIMESTAMPS > 0) ? true : false); + + // normalize + if(ROCPROFILER_UNLIKELY(normalize_env)) _value *= sysclock_period; + + if(strict_ts_env) + { + ROCP_FATAL_IF(ROCPROFILER_UNLIKELY(_value.end < _value.start)) + << fmt::format("Invalid {} time value: {} end time ({}) is less than the {} start time " + "({}) :: difference={}", + _label, + _label, + _value.end, + _label, + _value.start, + (_value.end - _value.start)); + + ROCP_FATAL_IF(ROCPROFILER_UNLIKELY(_value.start < _bounds.start)) + << fmt::format("Invalid {} time value: {} start time ({}) is less than the enqueue " + "time on the CPU ({}) :: difference={}", + _label, + _label, + _value.start, + _label, + _bounds.start, + (_bounds.start - _value.start)); + + ROCP_FATAL_IF(ROCPROFILER_UNLIKELY(_value.end > _bounds.end)) + << fmt::format("Invalid {} time value: {} end time ({}) is greater than the current " + "time on the CPU ({}) :: difference={}", + _label, + _label, + _value.end, + _label, + _bounds.end, + (_value.end - _bounds.end)); + } + + // below are hacks for clock skew issues: + // + // the timestamp of this handler will always be after when the profiling time ended + if(_bounds.end < _value.end) _value -= (_value.end - _bounds.end); + + // the timestamp of the enqueue will always be before when the profiling time started + if(_value.start < _bounds.start) _value += (_bounds.start - _value.start); + + return _value; +} +} // namespace tracing +} // namespace rocprofiler