SDK: create CMake option for strict checks on CPU vs. GPU timestamps (#1159)
* SDK: create CMake option for strict checks on CPU vs. GPU timestamps
- Configurating CMake with `ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS=ON` will enable fatal errors if dispatch/memcpy timestamps on GPU are outside of the start/end time from the CPU
- `ROCPROFIELR_BUILD_CI_STRICT_TIMESTAMPS` defaults to the value of `ROCPROFILER_BUILD_CI`
* Formatting
* Disable async_copy frequency scaling
* Disable profiling dispatch time frequency scaling
* Support runtime configuration via env variables
- ROCPROFILER_CI_FREQ_SCALE_TIMESTAMPS env variable will enable scaling the timestamps based on the hsa timestamp period
- ROCPROFILER_CI_STRICT_TIMESTAMPS env variable will enable strict timestamp checks
- when cmake is configured with ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS=ON, this env variable defaults to true
* ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS defaults to OFF
* Update cmake-target
* Common tracing::adjust_profiling_time
---------
Co-authored-by: Gopesh Bhardwaj <gopesh.bhardwaj@amd.com>
[ROCm/rocprofiler-sdk commit: ad48201912]
이 커밋은 다음에 포함됨:
@@ -198,6 +198,11 @@ if(ROCPROFILER_UNSAFE_NO_VERSION_CHECK)
|
||||
INTERFACE ROCPROFILER_UNSAFE_NO_VERSION_CHECK)
|
||||
endif()
|
||||
|
||||
if(ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS)
|
||||
rocprofiler_target_compile_definitions(rocprofiler-sdk-build-flags
|
||||
INTERFACE ROCPROFILER_CI_STRICT_TIMESTAMPS)
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# user customization
|
||||
#
|
||||
|
||||
@@ -38,6 +38,9 @@ rocprofiler_add_option(ROCPROFILER_BUILD_TESTS "Enable building the tests"
|
||||
${ROCPROFILER_BUILD_CI})
|
||||
rocprofiler_add_option(ROCPROFILER_BUILD_SAMPLES "Enable building the code samples"
|
||||
${ROCPROFILER_BUILD_CI})
|
||||
rocprofiler_add_option(
|
||||
ROCPROFILER_BUILD_CI_STRICT_TIMESTAMPS
|
||||
"Disable adjusting for clock skew b/t CPU and GPU timestamps" OFF ADVANCED)
|
||||
rocprofiler_add_option(ROCPROFILER_BUILD_CODECOV
|
||||
"Enable building for code coverage analysis" OFF)
|
||||
rocprofiler_add_option(ROCPROFILER_BUILD_DOCS
|
||||
|
||||
@@ -21,6 +21,8 @@
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
@@ -28,9 +30,9 @@
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/fwd.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
@@ -317,30 +319,6 @@ convert_hsa_handle(Up _hsa_object)
|
||||
return reinterpret_cast<Tp*>(_hsa_object.handle);
|
||||
}
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator+=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start += rhs;
|
||||
lhs.end += rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator-=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start -= rhs;
|
||||
lhs.end -= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator*=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start *= rhs;
|
||||
lhs.end *= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
bool
|
||||
async_copy_handler(hsa_signal_value_t signal_value, void* arg)
|
||||
{
|
||||
@@ -352,41 +330,38 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
|
||||
return false;
|
||||
}
|
||||
|
||||
static auto sysclock_period = hsa::get_hsa_timestamp_period();
|
||||
|
||||
auto ts = common::timestamp_ns();
|
||||
auto* _data = static_cast<async_copy_data*>(arg);
|
||||
auto copy_time = hsa_amd_profiling_async_copy_time_t{};
|
||||
auto copy_time_status = get_amd_ext_table()->hsa_amd_profiling_get_async_copy_time_fn(
|
||||
_data->rocp_signal, ©_time);
|
||||
|
||||
// normalize
|
||||
copy_time *= sysclock_period;
|
||||
auto _profile_time = tracing::profiling_time{copy_time_status, copy_time.start, copy_time.end};
|
||||
|
||||
// below is a hack for clock skew issues:
|
||||
// the timestamp of this handler for the copy will always be after when the copy ended
|
||||
if(ts < copy_time.end) copy_time -= (copy_time.end - ts);
|
||||
if(_profile_time.status == HSA_STATUS_SUCCESS)
|
||||
{
|
||||
_profile_time = tracing::adjust_profiling_time(
|
||||
"memcpy",
|
||||
_profile_time,
|
||||
tracing::profiling_time{HSA_STATUS_SUCCESS, _data->start_ts, ts});
|
||||
|
||||
// below is a hack for clock skew issues:
|
||||
// the timestamp of the function call triggering the copy will always be before when the copy
|
||||
// started
|
||||
if(copy_time.start < _data->start_ts) copy_time += (_data->start_ts - copy_time.start);
|
||||
|
||||
// if we encounter this in CI, it will cause test to fail
|
||||
ROCP_CI_LOG_IF(ERROR, copy_time_status == HSA_STATUS_SUCCESS && copy_time.end < copy_time.start)
|
||||
<< "hsa_amd_profiling_get_async_copy_time for returned async times where the end time ("
|
||||
<< copy_time.end << ") was less than the start time (" << copy_time.start << ")";
|
||||
// if we encounter this in CI, it will cause test to fail
|
||||
ROCP_CI_LOG_IF(ERROR, _profile_time.end < _profile_time.start)
|
||||
<< "hsa_amd_profiling_get_async_copy_time for returned async times where the end time ("
|
||||
<< _profile_time.end << ") was less than the start time (" << _profile_time.start
|
||||
<< ")";
|
||||
}
|
||||
|
||||
// get the contexts that were active when the signal was created
|
||||
const auto& tracing_data = _data->tracing_data;
|
||||
// we need to decrement this reference count at the end of the functions
|
||||
auto* _corr_id = _data->correlation_id;
|
||||
|
||||
if(copy_time_status == HSA_STATUS_SUCCESS && !tracing_data.empty())
|
||||
if(_profile_time.status == HSA_STATUS_SUCCESS && !tracing_data.empty())
|
||||
{
|
||||
if(!_data->tracing_data.callback_contexts.empty())
|
||||
{
|
||||
auto _tracer_data = _data->get_callback_data(copy_time.start, copy_time.end);
|
||||
auto _tracer_data = _data->get_callback_data(_profile_time.start, _profile_time.end);
|
||||
|
||||
tracing::execute_phase_exit_callbacks(_data->tracing_data.callback_contexts,
|
||||
_data->tracing_data.external_correlation_ids,
|
||||
@@ -397,7 +372,8 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
|
||||
|
||||
if(!_data->tracing_data.buffered_contexts.empty())
|
||||
{
|
||||
auto record = _data->get_buffered_record(nullptr, copy_time.start, copy_time.end);
|
||||
auto record =
|
||||
_data->get_buffered_record(nullptr, _profile_time.start, _profile_time.end);
|
||||
|
||||
tracing::execute_buffer_record_emplace(_data->tracing_data.buffered_contexts,
|
||||
_data->tid,
|
||||
|
||||
+17
-73
@@ -21,10 +21,13 @@
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp"
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
@@ -36,93 +39,35 @@ namespace rocprofiler
|
||||
{
|
||||
namespace kernel_dispatch
|
||||
{
|
||||
namespace
|
||||
{
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator+=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start += rhs;
|
||||
lhs.end += rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator-=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start -= rhs;
|
||||
lhs.end -= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator*=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start *= rhs;
|
||||
lhs.end *= rhs;
|
||||
return lhs;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
profiling_time&
|
||||
profiling_time::operator+=(uint64_t offset)
|
||||
{
|
||||
start += offset;
|
||||
end += offset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
profiling_time&
|
||||
profiling_time::operator-=(uint64_t offset)
|
||||
{
|
||||
start -= offset;
|
||||
end -= offset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
profiling_time&
|
||||
profiling_time::operator*=(uint64_t scale)
|
||||
{
|
||||
start *= scale;
|
||||
end *= scale;
|
||||
return *this;
|
||||
}
|
||||
|
||||
profiling_time
|
||||
get_dispatch_time(hsa_agent_t _hsa_agent,
|
||||
hsa_signal_t _signal,
|
||||
rocprofiler_kernel_id_t _kernel_id,
|
||||
std::optional<uint64_t> _baseline)
|
||||
{
|
||||
static auto sysclock_period = hsa::get_hsa_timestamp_period();
|
||||
|
||||
auto ts = common::timestamp_ns();
|
||||
auto dispatch_time = hsa_amd_profiling_dispatch_time_t{};
|
||||
auto dispatch_time_status = hsa::get_amd_ext_table()->hsa_amd_profiling_get_dispatch_time_fn(
|
||||
_hsa_agent, _signal, &dispatch_time);
|
||||
|
||||
if(dispatch_time_status == HSA_STATUS_SUCCESS)
|
||||
auto _profile_time =
|
||||
tracing::profiling_time{dispatch_time_status, dispatch_time.start, dispatch_time.end};
|
||||
|
||||
if(_profile_time.status == HSA_STATUS_SUCCESS)
|
||||
{
|
||||
// if we encounter this in CI, it will cause test to fail
|
||||
ROCP_CI_LOG_IF(ERROR, dispatch_time.end < dispatch_time.start)
|
||||
ROCP_CI_LOG_IF(ERROR, _profile_time.end < _profile_time.start)
|
||||
<< "hsa_amd_profiling_get_dispatch_time for kernel_id=" << _kernel_id
|
||||
<< " on rocprofiler_agent="
|
||||
<< CHECK_NOTNULL(agent::get_rocprofiler_agent(_hsa_agent))->id.handle
|
||||
<< " returned dispatch times where the end time (" << dispatch_time.end
|
||||
<< ") was less than the start time (" << dispatch_time.start << ")";
|
||||
<< CHECK_NOTNULL(agent::get_rocprofiler_agent(_hsa_agent))->node_id
|
||||
<< " returned dispatch times where the end time (" << _profile_time.end
|
||||
<< ") was less than the start time (" << _profile_time.start << ")";
|
||||
|
||||
// normalize
|
||||
dispatch_time *= sysclock_period;
|
||||
|
||||
// below is a hack for clock skew issues:
|
||||
// the timestamp of this handler for the kernel dispatch will always be after when the
|
||||
// kernel completed
|
||||
if(ts < dispatch_time.end) dispatch_time -= (dispatch_time.end - ts);
|
||||
|
||||
// below is a hack for clock skew issues:
|
||||
// the timestamp of the packet rewriter for the kernel packet will always be before when the
|
||||
// kernel started
|
||||
if(_baseline && dispatch_time.start < *_baseline)
|
||||
dispatch_time += (*_baseline - dispatch_time.start);
|
||||
_profile_time = tracing::adjust_profiling_time(
|
||||
"dispatch",
|
||||
_profile_time,
|
||||
tracing::profiling_time{
|
||||
HSA_STATUS_SUCCESS, _baseline.value_or(dispatch_time.start), ts});
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -133,8 +78,7 @@ get_dispatch_time(hsa_agent_t _hsa_agent,
|
||||
<< " :: " << hsa::get_hsa_status_string(dispatch_time_status);
|
||||
}
|
||||
|
||||
return profiling_time{
|
||||
.status = dispatch_time_status, .start = dispatch_time.start, .end = dispatch_time.end};
|
||||
return _profile_time;
|
||||
}
|
||||
} // namespace kernel_dispatch
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
|
||||
@@ -34,16 +36,7 @@ namespace rocprofiler
|
||||
{
|
||||
namespace kernel_dispatch
|
||||
{
|
||||
struct profiling_time
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_ERROR_INVALID_SIGNAL;
|
||||
uint64_t start = 0;
|
||||
uint64_t end = 0;
|
||||
|
||||
profiling_time& operator+=(uint64_t offset);
|
||||
profiling_time& operator-=(uint64_t offset);
|
||||
profiling_time& operator*=(uint64_t scale);
|
||||
};
|
||||
using profiling_time = tracing::profiling_time;
|
||||
|
||||
// get the profiling time for a signal on an agent, if start time is less than baseline, correct to
|
||||
// start at baseline. If kernel_id is provided, it will be included in error log message if there is
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "lib/rocprofiler-sdk/hsa/queue_info_session.hpp"
|
||||
// #include "lib/rocprofiler-sdk/kernel_dispatch/profiling_time.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
@@ -45,7 +45,7 @@ using context_t = context::context;
|
||||
using user_data_map_t = std::unordered_map<const context_t*, rocprofiler_user_data_t>;
|
||||
using external_corr_id_map_t = user_data_map_t;
|
||||
|
||||
struct profiling_time;
|
||||
using profiling_time = tracing::profiling_time;
|
||||
|
||||
profiling_time
|
||||
get_dispatch_time(const hsa::queue_info_session& session);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#
|
||||
set(ROCPROFILER_LIB_TRACING_SOURCES)
|
||||
set(ROCPROFILER_LIB_TRACING_HEADERS fwd.hpp tracing.hpp)
|
||||
set(ROCPROFILER_LIB_TRACING_SOURCES profiling_time.cpp)
|
||||
set(ROCPROFILER_LIB_TRACING_HEADERS fwd.hpp profiling_time.hpp tracing.hpp)
|
||||
|
||||
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_TRACING_SOURCES}
|
||||
${ROCPROFILER_LIB_TRACING_HEADERS})
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/tracing/profiling_time.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tracing
|
||||
{
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator+=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start += rhs;
|
||||
lhs.end += rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator-=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start -= rhs;
|
||||
lhs.end -= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator*=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start *= rhs;
|
||||
lhs.end *= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator+=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start += rhs;
|
||||
lhs.end += rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator-=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start -= rhs;
|
||||
lhs.end -= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator*=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs)
|
||||
{
|
||||
lhs.start *= rhs;
|
||||
lhs.end *= rhs;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
profiling_time&
|
||||
profiling_time::operator+=(uint64_t offset)
|
||||
{
|
||||
start += offset;
|
||||
end += offset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
profiling_time&
|
||||
profiling_time::operator-=(uint64_t offset)
|
||||
{
|
||||
start -= offset;
|
||||
end -= offset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
profiling_time&
|
||||
profiling_time::operator*=(uint64_t scale)
|
||||
{
|
||||
start *= scale;
|
||||
end *= scale;
|
||||
return *this;
|
||||
}
|
||||
} // namespace tracing
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,129 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tracing
|
||||
{
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator+=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs);
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator-=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs);
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t&
|
||||
operator*=(hsa_amd_profiling_dispatch_time_t& lhs, uint64_t rhs);
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator+=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs);
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator-=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs);
|
||||
|
||||
hsa_amd_profiling_async_copy_time_t&
|
||||
operator*=(hsa_amd_profiling_async_copy_time_t& lhs, uint64_t rhs);
|
||||
|
||||
#if !defined(ROCPROFILER_CI_STRICT_TIMESTAMPS)
|
||||
# define ROCPROFILER_CI_STRICT_TIMESTAMPS 0
|
||||
#endif
|
||||
|
||||
struct profiling_time
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_ERROR_INVALID_SIGNAL;
|
||||
uint64_t start = 0;
|
||||
uint64_t end = 0;
|
||||
|
||||
profiling_time& operator+=(uint64_t offset);
|
||||
profiling_time& operator-=(uint64_t offset);
|
||||
profiling_time& operator*=(uint64_t scale);
|
||||
};
|
||||
|
||||
inline profiling_time
|
||||
adjust_profiling_time(std::string_view _label, profiling_time _value, profiling_time&& _bounds)
|
||||
{
|
||||
static auto sysclock_period = hsa::get_hsa_timestamp_period();
|
||||
static auto normalize_env = common::get_env("ROCPROFILER_CI_FREQ_SCALE_TIMESTAMPS", false);
|
||||
static auto strict_ts_env = common::get_env(
|
||||
"ROCPROFILER_CI_STRICT_TIMESTAMPS", (ROCPROFILER_CI_STRICT_TIMESTAMPS > 0) ? true : false);
|
||||
|
||||
// normalize
|
||||
if(ROCPROFILER_UNLIKELY(normalize_env)) _value *= sysclock_period;
|
||||
|
||||
if(strict_ts_env)
|
||||
{
|
||||
ROCP_FATAL_IF(ROCPROFILER_UNLIKELY(_value.end < _value.start))
|
||||
<< fmt::format("Invalid {} time value: {} end time ({}) is less than the {} start time "
|
||||
"({}) :: difference={}",
|
||||
_label,
|
||||
_label,
|
||||
_value.end,
|
||||
_label,
|
||||
_value.start,
|
||||
(_value.end - _value.start));
|
||||
|
||||
ROCP_FATAL_IF(ROCPROFILER_UNLIKELY(_value.start < _bounds.start))
|
||||
<< fmt::format("Invalid {} time value: {} start time ({}) is less than the enqueue "
|
||||
"time on the CPU ({}) :: difference={}",
|
||||
_label,
|
||||
_label,
|
||||
_value.start,
|
||||
_label,
|
||||
_bounds.start,
|
||||
(_bounds.start - _value.start));
|
||||
|
||||
ROCP_FATAL_IF(ROCPROFILER_UNLIKELY(_value.end > _bounds.end))
|
||||
<< fmt::format("Invalid {} time value: {} end time ({}) is greater than the current "
|
||||
"time on the CPU ({}) :: difference={}",
|
||||
_label,
|
||||
_label,
|
||||
_value.end,
|
||||
_label,
|
||||
_bounds.end,
|
||||
(_value.end - _bounds.end));
|
||||
}
|
||||
|
||||
// below are hacks for clock skew issues:
|
||||
//
|
||||
// the timestamp of this handler will always be after when the profiling time ended
|
||||
if(_bounds.end < _value.end) _value -= (_value.end - _bounds.end);
|
||||
|
||||
// the timestamp of the enqueue will always be before when the profiling time started
|
||||
if(_value.start < _bounds.start) _value += (_bounds.start - _value.start);
|
||||
|
||||
return _value;
|
||||
}
|
||||
} // namespace tracing
|
||||
} // namespace rocprofiler
|
||||
새 이슈에서 참조
사용자 차단