diff --git a/projects/rocprofiler-sdk/samples/counter_collection/CMakeLists.txt b/projects/rocprofiler-sdk/samples/counter_collection/CMakeLists.txt index 3dd1a434e0..4e80703f07 100644 --- a/projects/rocprofiler-sdk/samples/counter_collection/CMakeLists.txt +++ b/projects/rocprofiler-sdk/samples/counter_collection/CMakeLists.txt @@ -47,7 +47,7 @@ set_tests_properties( counter-collection PROPERTIES TIMEOUT - 45 + 300 LABELS "samples" ENVIRONMENT diff --git a/projects/rocprofiler-sdk/samples/counter_collection/main.cpp b/projects/rocprofiler-sdk/samples/counter_collection/main.cpp index 1a456e92ce..596d77c28c 100644 --- a/projects/rocprofiler-sdk/samples/counter_collection/main.cpp +++ b/projects/rocprofiler-sdk/samples/counter_collection/main.cpp @@ -40,7 +40,7 @@ kernelC(T* C_d, const T* A_d, size_t N) void launchKernals() { - const int NUM_LAUNCH = 1000; + const int NUM_LAUNCH = 200000; // Normal HIP Calls int* gpuMem; [[maybe_unused]] hipDeviceProp_t devProp; diff --git a/projects/rocprofiler-sdk/source/lib/common/utility.hpp b/projects/rocprofiler-sdk/source/lib/common/utility.hpp index 20361086f5..5d28f50e83 100644 --- a/projects/rocprofiler-sdk/source/lib/common/utility.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/utility.hpp @@ -25,7 +25,9 @@ #include #include #include +#include #include +#include #include #include @@ -97,5 +99,57 @@ private: L _destroy_func; }; +/** + * Limits the number of active items to those set in capacity. + * If capacity is reached, will block until another caller + * removes active capacity. + */ +class active_capacity_gate +{ +public: + active_capacity_gate(size_t capacity) + : _capacity(capacity) + {} + void add_active(size_t size) + { + if(size >= _capacity) + { + throw std::runtime_error("Size exceeds gate capacity"); + } + + std::unique_lock lock(_m); + if(_count + size < _capacity) + { + _count += size; + return; + } + _waiters++; + _cv.wait(lock, [&]() { return _count + size < _capacity; }); + _waiters--; + _count += size; + } + + void remove_active(size_t size) + { + std::unique_lock lock(_m); + if(_count > size) + _count -= size; + else + _count = 0; + + if(_waiters > 0) + { + _cv.notify_all(); + } + } + +private: + size_t _count{0}; + size_t _capacity{0}; + size_t _waiters{0}; + std::mutex _m; + std::condition_variable _cv; +}; + } // namespace common } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp index afafd48a81..d2534353f3 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp @@ -31,6 +31,15 @@ namespace hsa { namespace { +common::active_capacity_gate& +signal_limiter() +{ + // Limit the maximun number of HSA signals created. + // There is a hard limit to the maximum that can exist. + static common::active_capacity_gate _gate(1024); + return _gate; +} + bool AsyncSignalHandler(hsa_signal_value_t, void* data) { @@ -59,15 +68,24 @@ AsyncSignalHandler(hsa_signal_value_t, void* data) } }); + size_t signals_to_remove = 0; // Delete signals and packets, signal we have completed. if(queue_info_session.interrupt_signal.handle != 0u) + { + signals_to_remove++; queue_info_session.queue.core_api().hsa_signal_destroy_fn( queue_info_session.interrupt_signal); + } if(queue_info_session.kernel_pkt.completion_signal.handle != 0u) { + signals_to_remove++; queue_info_session.queue.core_api().hsa_signal_destroy_fn( queue_info_session.kernel_pkt.completion_signal); } + if(signals_to_remove > 0) + { + signal_limiter().remove_active(signals_to_remove); + } queue_info_session.queue.async_complete(); delete static_cast(data); @@ -267,6 +285,7 @@ Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_sessio void Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const { + signal_limiter().add_active(1); hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal); LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) << "Error: hsa_amd_signal_create failed";