Optimize rotcx markers
Improve the roctx markers performance when the tracer is not engaged
(the application is not running with rocprof).
The performance of roctx push/pop, measured with:
-----------------------------------------------------------------------
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < 10000000; ++i) {
roctxRangePush ("A");
roctxRangePop ();
}
auto end = std::chrono::steady_clock::now();
std::cout << "ns = " << std::chrono::nanoseconds(end - start).count()
/ 10000000 << std::endl;
-----------------------------------------------------------------------
w/o rocprof | with rocprof | commit
92ns | 770ns | 0d6e132: Cleanup CallbackTable::Get
28ns | 712ns | 6421bd5: Cleanup ROCTX's implementation
20ns | 664ns | 7f0e5e5: Remove the roctx range message...
6ns | 665ns | this commit
Change-Id: Id679dcbd0fb190a3179be98a9b2c1db151efee3d
[ROCm/roctracer commit: a794247c55]
This commit is contained in:
@@ -395,7 +395,7 @@ class API_DescrParser:
|
||||
# generate API callbacks
|
||||
def gen_callbacks(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content += 'typedef CallbackTable<HSA_API_ID_NUMBER> cb_table_t;\n'
|
||||
self.content += 'typedef CallbackTable<ACTIVITY_DOMAIN_HSA_API, HSA_API_ID_NUMBER> cb_table_t;\n'
|
||||
self.content += 'extern cb_table_t cb_table;\n'
|
||||
self.content += '\n'
|
||||
if call != '-':
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
|
||||
#include <ext/prof_protocol.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
@@ -30,7 +32,7 @@
|
||||
namespace roctracer {
|
||||
|
||||
// Generic callbacks table
|
||||
template <uint32_t N> class CallbackTable {
|
||||
template <activity_domain_t Domain, uint32_t N> class CallbackTable {
|
||||
public:
|
||||
CallbackTable()
|
||||
// Zero initialize the callbacks array as the function pointer is used to determine if the
|
||||
@@ -40,17 +42,26 @@ template <uint32_t N> class CallbackTable {
|
||||
void Set(uint32_t callback_id, activity_rtapi_callback_t callback_function, void* user_arg) {
|
||||
assert(callback_id < N && "callback_id is out of range");
|
||||
std::lock_guard lock(mutex_);
|
||||
callbacks_[callback_id] = {callback_function, user_arg};
|
||||
auto& callback = callbacks_[callback_id];
|
||||
callback.first.store(callback_function, std::memory_order_relaxed);
|
||||
callback.second = user_arg;
|
||||
}
|
||||
|
||||
std::pair<activity_rtapi_callback_t, void*> Get(uint32_t callback_id) const {
|
||||
auto Get(uint32_t callback_id) const {
|
||||
assert(callback_id < N && "id is out of range");
|
||||
std::lock_guard lock(mutex_);
|
||||
return callbacks_[callback_id];
|
||||
auto& callback = callbacks_[callback_id];
|
||||
return std::make_pair(callback.first.load(std::memory_order_relaxed), callback.second);
|
||||
}
|
||||
|
||||
template <typename... Args> void Invoke(uint32_t callback_id, Args... args) {
|
||||
if (callbacks_[callback_id].first.load(std::memory_order_relaxed) == nullptr) return;
|
||||
if (auto [callback_function, user_arg] = Get(callback_id); callback_function != nullptr)
|
||||
callback_function(Domain, callback_id, std::forward<Args>(args)..., user_arg);
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<std::pair<activity_rtapi_callback_t, void*>, N> callbacks_;
|
||||
std::array<std::pair<std::atomic<activity_rtapi_callback_t>, void*>, N> callbacks_;
|
||||
mutable std::mutex mutex_;
|
||||
};
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ typedef enum {
|
||||
//
|
||||
namespace {
|
||||
|
||||
roctracer::CallbackTable<ROCTX_API_ID_NUMBER> callbacks;
|
||||
roctracer::CallbackTable<ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_NUMBER> callbacks;
|
||||
thread_local int range_level(0);
|
||||
|
||||
} // namespace
|
||||
@@ -75,24 +75,17 @@ PUBLIC_API uint32_t roctx_version_minor() { return ROCTX_VERSION_MINOR; }
|
||||
|
||||
PUBLIC_API void roctxMarkA(const char* message) {
|
||||
API_METHOD_PREFIX
|
||||
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxMarkA);
|
||||
api_callback_fun != nullptr) {
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxMarkA.message = message;
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxMarkA, &api_data, api_callback_arg);
|
||||
}
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxMarkA.message = message;
|
||||
callbacks.Invoke(ROCTX_API_ID_roctxMarkA, &api_data);
|
||||
API_METHOD_SUFFIX_NRET
|
||||
}
|
||||
|
||||
PUBLIC_API int roctxRangePushA(const char* message) {
|
||||
API_METHOD_PREFIX
|
||||
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangePushA);
|
||||
api_callback_fun != nullptr) {
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangePushA.message = message;
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data,
|
||||
api_callback_arg);
|
||||
}
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangePushA.message = message;
|
||||
callbacks.Invoke(ROCTX_API_ID_roctxRangePushA, &api_data);
|
||||
|
||||
return range_level++;
|
||||
API_METHOD_CATCH(-1);
|
||||
@@ -100,12 +93,9 @@ PUBLIC_API int roctxRangePushA(const char* message) {
|
||||
|
||||
PUBLIC_API int roctxRangePop() {
|
||||
API_METHOD_PREFIX
|
||||
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangePop);
|
||||
api_callback_fun != nullptr) {
|
||||
roctx_api_data_t api_data{};
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data,
|
||||
api_callback_arg);
|
||||
}
|
||||
|
||||
roctx_api_data_t api_data{};
|
||||
callbacks.Invoke(ROCTX_API_ID_roctxRangePop, &api_data);
|
||||
|
||||
if (range_level == 0) EXC_RAISING(ROCTX_STATUS_ERROR, "Pop from empty stack!");
|
||||
return --range_level;
|
||||
@@ -116,13 +106,9 @@ PUBLIC_API roctx_range_id_t roctxRangeStartA(const char* message) {
|
||||
API_METHOD_PREFIX
|
||||
static std::atomic<roctx_range_id_t> roctx_range_counter(1);
|
||||
|
||||
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangeStartA);
|
||||
api_callback_fun != nullptr) {
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangeStartA.message = message;
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStartA, &api_data,
|
||||
api_callback_arg);
|
||||
}
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangeStartA.message = message;
|
||||
callbacks.Invoke(ROCTX_API_ID_roctxRangeStartA, &api_data);
|
||||
|
||||
return roctx_range_counter++;
|
||||
API_METHOD_CATCH(-1)
|
||||
@@ -130,13 +116,9 @@ PUBLIC_API roctx_range_id_t roctxRangeStartA(const char* message) {
|
||||
|
||||
PUBLIC_API void roctxRangeStop(roctx_range_id_t rangeId) {
|
||||
API_METHOD_PREFIX
|
||||
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangeStop);
|
||||
api_callback_fun != nullptr) {
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangeStop.id = rangeId;
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStop, &api_data,
|
||||
api_callback_arg);
|
||||
}
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangeStop.id = rangeId;
|
||||
callbacks.Invoke(ROCTX_API_ID_roctxRangeStop, &api_data);
|
||||
API_METHOD_SUFFIX_NRET
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user