Improve the roctx markers performance when the tracer is not engaged
(the application is not running with rocprof).

The performance of roctx push/pop, measured with:

-----------------------------------------------------------------------
  auto start = std::chrono::steady_clock::now();
  for (int i = 0; i < 10000000; ++i) {
    roctxRangePush ("A");
    roctxRangePop ();
  }
  auto end = std::chrono::steady_clock::now();
  std::cout << "ns = " << std::chrono::nanoseconds(end - start).count()
      / 10000000 << std::endl;
-----------------------------------------------------------------------

w/o rocprof | with rocprof | commit
       92ns |       770ns  | 0d6e132: Cleanup CallbackTable::Get
       28ns |       712ns  | 6421bd5: Cleanup ROCTX's implementation
       20ns |       664ns  | 7f0e5e5: Remove the roctx range message...
        6ns |       665ns  | this commit

Change-Id: Id679dcbd0fb190a3179be98a9b2c1db151efee3d
Этот коммит содержится в:
Laurent Morichetti
2022-05-06 19:46:09 -07:00
родитель 3d0198c395
Коммит a794247c55
3 изменённых файлов: 33 добавлений и 40 удалений
+1 -1
Просмотреть файл
@@ -395,7 +395,7 @@ class API_DescrParser:
# generate API callbacks
def gen_callbacks(self, n, name, call, struct):
if n == -1:
self.content += 'typedef CallbackTable<HSA_API_ID_NUMBER> cb_table_t;\n'
self.content += 'typedef CallbackTable<ACTIVITY_DOMAIN_HSA_API, HSA_API_ID_NUMBER> cb_table_t;\n'
self.content += 'extern cb_table_t cb_table;\n'
self.content += '\n'
if call != '-':
+16 -5
Просмотреть файл
@@ -23,6 +23,8 @@
#include <ext/prof_protocol.h>
#include <array>
#include <atomic>
#include <cassert>
#include <mutex>
#include <utility>
@@ -30,7 +32,7 @@
namespace roctracer {
// Generic callbacks table
template <uint32_t N> class CallbackTable {
template <activity_domain_t Domain, uint32_t N> class CallbackTable {
public:
CallbackTable()
// Zero initialize the callbacks array as the function pointer is used to determine if the
@@ -40,17 +42,26 @@ template <uint32_t N> class CallbackTable {
void Set(uint32_t callback_id, activity_rtapi_callback_t callback_function, void* user_arg) {
assert(callback_id < N && "callback_id is out of range");
std::lock_guard lock(mutex_);
callbacks_[callback_id] = {callback_function, user_arg};
auto& callback = callbacks_[callback_id];
callback.first.store(callback_function, std::memory_order_relaxed);
callback.second = user_arg;
}
std::pair<activity_rtapi_callback_t, void*> Get(uint32_t callback_id) const {
auto Get(uint32_t callback_id) const {
assert(callback_id < N && "id is out of range");
std::lock_guard lock(mutex_);
return callbacks_[callback_id];
auto& callback = callbacks_[callback_id];
return std::make_pair(callback.first.load(std::memory_order_relaxed), callback.second);
}
template <typename... Args> void Invoke(uint32_t callback_id, Args... args) {
if (callbacks_[callback_id].first.load(std::memory_order_relaxed) == nullptr) return;
if (auto [callback_function, user_arg] = Get(callback_id); callback_function != nullptr)
callback_function(Domain, callback_id, std::forward<Args>(args)..., user_arg);
}
private:
std::array<std::pair<activity_rtapi_callback_t, void*>, N> callbacks_;
std::array<std::pair<std::atomic<activity_rtapi_callback_t>, void*>, N> callbacks_;
mutable std::mutex mutex_;
};
+16 -34
Просмотреть файл
@@ -57,7 +57,7 @@ typedef enum {
//
namespace {
roctracer::CallbackTable<ROCTX_API_ID_NUMBER> callbacks;
roctracer::CallbackTable<ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_NUMBER> callbacks;
thread_local int range_level(0);
} // namespace
@@ -75,24 +75,17 @@ PUBLIC_API uint32_t roctx_version_minor() { return ROCTX_VERSION_MINOR; }
PUBLIC_API void roctxMarkA(const char* message) {
API_METHOD_PREFIX
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxMarkA);
api_callback_fun != nullptr) {
roctx_api_data_t api_data{};
api_data.args.roctxMarkA.message = message;
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxMarkA, &api_data, api_callback_arg);
}
roctx_api_data_t api_data{};
api_data.args.roctxMarkA.message = message;
callbacks.Invoke(ROCTX_API_ID_roctxMarkA, &api_data);
API_METHOD_SUFFIX_NRET
}
PUBLIC_API int roctxRangePushA(const char* message) {
API_METHOD_PREFIX
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangePushA);
api_callback_fun != nullptr) {
roctx_api_data_t api_data{};
api_data.args.roctxRangePushA.message = message;
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data,
api_callback_arg);
}
roctx_api_data_t api_data{};
api_data.args.roctxRangePushA.message = message;
callbacks.Invoke(ROCTX_API_ID_roctxRangePushA, &api_data);
return range_level++;
API_METHOD_CATCH(-1);
@@ -100,12 +93,9 @@ PUBLIC_API int roctxRangePushA(const char* message) {
PUBLIC_API int roctxRangePop() {
API_METHOD_PREFIX
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangePop);
api_callback_fun != nullptr) {
roctx_api_data_t api_data{};
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data,
api_callback_arg);
}
roctx_api_data_t api_data{};
callbacks.Invoke(ROCTX_API_ID_roctxRangePop, &api_data);
if (range_level == 0) EXC_RAISING(ROCTX_STATUS_ERROR, "Pop from empty stack!");
return --range_level;
@@ -116,13 +106,9 @@ PUBLIC_API roctx_range_id_t roctxRangeStartA(const char* message) {
API_METHOD_PREFIX
static std::atomic<roctx_range_id_t> roctx_range_counter(1);
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangeStartA);
api_callback_fun != nullptr) {
roctx_api_data_t api_data{};
api_data.args.roctxRangeStartA.message = message;
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStartA, &api_data,
api_callback_arg);
}
roctx_api_data_t api_data{};
api_data.args.roctxRangeStartA.message = message;
callbacks.Invoke(ROCTX_API_ID_roctxRangeStartA, &api_data);
return roctx_range_counter++;
API_METHOD_CATCH(-1)
@@ -130,13 +116,9 @@ PUBLIC_API roctx_range_id_t roctxRangeStartA(const char* message) {
PUBLIC_API void roctxRangeStop(roctx_range_id_t rangeId) {
API_METHOD_PREFIX
if (auto [api_callback_fun, api_callback_arg] = callbacks.Get(ROCTX_API_ID_roctxRangeStop);
api_callback_fun != nullptr) {
roctx_api_data_t api_data{};
api_data.args.roctxRangeStop.id = rangeId;
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStop, &api_data,
api_callback_arg);
}
roctx_api_data_t api_data{};
api_data.args.roctxRangeStop.id = rangeId;
callbacks.Invoke(ROCTX_API_ID_roctxRangeStop, &api_data);
API_METHOD_SUFFIX_NRET
}