diff --git a/projects/rocprofiler/CHANGELOG.md b/projects/rocprofiler/CHANGELOG.md index cdb8c36d02..d2fa8d50f2 100644 --- a/projects/rocprofiler/CHANGELOG.md +++ b/projects/rocprofiler/CHANGELOG.md @@ -222,12 +222,15 @@ The resulting `a.out` will depend on ### Navi support Rocprofiler for ROCm 5.7 added support for counter collection (PMC) and advanced thread tracing (ATT) for Navi21 and Navi31 GPUs. - On Navi3x, counter collection requires the GPU to be in a stable power state. See README.md for instructions. HIP RT in ATT not yet supported. + ### Changed - ATT analysis will not run by default. For ATT to have the same behaviour as 5.5, use --plugin att --mode network - Kernel Names are now removed from HIP API records, users of the API can get the kernel names from the corresponding HIP Dispatch OPS using the correlation ID, this change was done to optimize and to manage the data copied. + ### Optimized - ATT json filesizes - Now profiler autocorrects user input errors for pmc and throws exception for wrong input with this message:"Bad input metric. usage --> pmc: [counter1] [counter2]" + ### Added - Every API trace in V2 reported synchronously will have two records, one for Enter phase and for Exit phase - File Plugin now reports the HSA OPS operation kind as part of the output text @@ -259,6 +262,8 @@ Example for file plugin output: HIP_API_DOMAIN,hipMalloc,,316678074131382,316678074136111,3 ``` - Removing Record IDs from tracer records in CLI plugin. +- Added Flush Interval and Trace Period functionality, where --flush-interval , for flushing the buffers every given interval by the user, and --trace-period :, where delay is the time to wait before starting session, and trace_time is the time between every start and stop session. For more details please refer to the ROCProfV2 tool usage document. + ### Fixed - Samples are fixed to show the new usage of phases. - Plugin option validates the plugin names. diff --git a/projects/rocprofiler/README.md b/projects/rocprofiler/README.md index 7a906eed32..b40ad6bdd3 100644 --- a/projects/rocprofiler/README.md +++ b/projects/rocprofiler/README.md @@ -273,6 +273,20 @@ The user has two options for building: ./rocprofv2 --plugin plugin_name -i samples/input.txt -d output_dir # -d is optional, but can be used to define the directory output for output results ``` + - Flush Interval: Flush interval can be used to control the interval time in milliseconds between the buffers flush for the tool. However, if the buffers are full the flush will be called on its own. This can be used as in the next example: + ```bash + rocprofv2 --flush-interval + ``` + + - Trace Period: Trace period can be used to control when the profiling or tracing is enabled using two arguments, the first one is the delay time, which is the time spent idle without tracing or profiling. The second argument is the profiling or the tracing time, which is the active time where the profiling and tracing are working, so basically, the session will work in the following timeline: + ``` + # => => => + ``` + This feature can be used using the following command: + ```bash + rocprofv2 --trace-period : + ``` + - Device Profiling: A device profiling session allows the user to profile the GPU device for counters irrespective of the running applications on the GPU. This is different from application profiling. device profiling session doesn't care about the host running processes and threads. It directly provides low level profiling information. - Session Support: A session is a unique identifier for a profiling/tracing/pc-sampling task. A ROCProfilerV2 Session has enough information about what needs to be collected or traced and it allows the user to start/stop profiling/tracing whenever required. More details on the API can be found in the API specification documentation that can be installed using rocprofiler-doc package. Samples also can be found for how to use the API in samples directory. diff --git a/projects/rocprofiler/bin/rocprofv2 b/projects/rocprofiler/bin/rocprofv2 index 91c50a54b7..43e016b3de 100755 --- a/projects/rocprofiler/bin/rocprofv2 +++ b/projects/rocprofiler/bin/rocprofv2 @@ -38,6 +38,7 @@ usage() { echo -e "-o | --output-file For the output file name" echo -e "-d | --output-directory For adding output path where the output files will be saved" echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed" + echo -e "-tp | --trace-period For adding a trace period in milliseconds, in the following format \"-tp :\"." exit 1 } @@ -168,6 +169,16 @@ while [ 1 ] ; do fi shift shift + elif [[ "$1" = "-tp" || "$1" = "--trace-period" ]] ; then + if [ $2 ] && [[ "$2" == *":"* ]] ; then + export ROCPROFILER_TRACE_PERIOD=$2 + else + echo -e "Wrong input \"$2\" for trace period!" + usage + exit 1 + fi + shift + shift elif [ "$1" = "--hip-api" ] ; then export ROCPROFILER_HIP_API_TRACE=1 shift diff --git a/projects/rocprofiler/src/api/rocprofiler_singleton.cpp b/projects/rocprofiler/src/api/rocprofiler_singleton.cpp index 7ca9f0235d..b93af87cc2 100644 --- a/projects/rocprofiler/src/api/rocprofiler_singleton.cpp +++ b/projects/rocprofiler/src/api/rocprofiler_singleton.cpp @@ -57,34 +57,36 @@ ROCProfiler_Singleton::ROCProfiler_Singleton() : current_session_id_(rocprofiler // session map and clears them from the map. Pops labels from the range stack // and deletes the stack. ROCProfiler_Singleton::~ROCProfiler_Singleton() { - // { - // std::lock_guard lock(session_map_lock_); - // if (!sessions_.empty()) { - // // TODO(aelwazir): throw an exception user need to destroy all created - // // session (document) - // // fatal("Error: Sessions are not destroyed yet!"); - // sessions_.clear(); - // } - // } + { + std::lock_guard lock(session_map_lock_); + if (!sessions_.empty()) { + for (auto& session : sessions_) { + if (session.second) delete session.second; + } + sessions_.clear(); + } + } Counter::ClearBasicCounters(); } bool ROCProfiler_Singleton::FindAgent(rocprofiler_agent_id_t agent_id) { return true; } -size_t ROCProfiler_Singleton::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id) { +size_t ROCProfiler_Singleton::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, + rocprofiler_agent_id_t agent_id) { return 0; } const char* ROCProfiler_Singleton::GetAgentInfo(rocprofiler_agent_info_kind_t kind, - rocprofiler_agent_id_t agent_id) { + rocprofiler_agent_id_t agent_id) { return ""; } // TODO(aelwazir): Implement Queue Query bool ROCProfiler_Singleton::FindQueue(rocprofiler_queue_id_t queue_id) { return true; } -size_t ROCProfiler_Singleton::GetQueueInfoSize(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id) { +size_t ROCProfiler_Singleton::GetQueueInfoSize(rocprofiler_queue_info_kind_t kind, + rocprofiler_queue_id_t queue_id) { return 0; } const char* ROCProfiler_Singleton::GetQueueInfo(rocprofiler_queue_info_kind_t kind, - rocprofiler_queue_id_t queue_id) { + rocprofiler_queue_id_t queue_id) { return ""; } @@ -93,7 +95,8 @@ bool ROCProfiler_Singleton::FindSession(rocprofiler_session_id_t session_id) { return sessions_.find(session_id.handle) != sessions_.end(); } -rocprofiler_session_id_t ROCProfiler_Singleton::CreateSession(rocprofiler_replay_mode_t replay_mode) { +rocprofiler_session_id_t ROCProfiler_Singleton::CreateSession( + rocprofiler_replay_mode_t replay_mode) { rocprofiler_session_id_t session_id = rocprofiler_session_id_t{GenerateUniqueSessionId()}; { std::lock_guard lock(session_map_lock_); @@ -105,22 +108,11 @@ rocprofiler_session_id_t ROCProfiler_Singleton::CreateSession(rocprofiler_replay void ROCProfiler_Singleton::DestroySession(rocprofiler_session_id_t session_id) { while (GetCurrentActiveInterruptSignalsCount() != 0) { } - - // if (GetSession(session_id)->GetTracer()) { - // GetSession(session_id)->GetTracer().reset(); - // GetSession(session_id) - // ->GetBuffer( - // GetSession(session_id) - // ->GetFilter(GetSession(session_id)->GetFilterIdWithKind(ROCPROFILER_API_TRACE)) - // .GetBufferId()) - // .reset(); - // } - { std::lock_guard lock(session_map_lock_); ASSERTM(sessions_.find(session_id.handle) != sessions_.end(), "Error: Couldn't find a created session with given id"); - delete sessions_.at(session_id.handle); + if (sessions_.at(session_id.handle)) delete sessions_.at(session_id.handle); sessions_.erase(session_id.handle); } } @@ -130,9 +122,8 @@ bool ROCProfiler_Singleton::FindDeviceProfilingSession(rocprofiler_session_id_t return dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end(); } -rocprofiler_session_id_t ROCProfiler_Singleton::CreateDeviceProfilingSession(std::vector counters, - int cpu_agent_index, - int gpu_agent_index) { +rocprofiler_session_id_t ROCProfiler_Singleton::CreateDeviceProfilingSession( + std::vector counters, int cpu_agent_index, int gpu_agent_index) { rocprofiler_session_id_t session_id; { std::lock_guard lock(device_profiling_session_map_lock_); @@ -159,7 +150,8 @@ void ROCProfiler_Singleton::DestroyDeviceProfilingSession(rocprofiler_session_id } } -DeviceProfileSession* ROCProfiler_Singleton::GetDeviceProfilingSession(rocprofiler_session_id_t session_id) { +DeviceProfileSession* ROCProfiler_Singleton::GetDeviceProfilingSession( + rocprofiler_session_id_t session_id) { std::lock_guard lock(device_profiling_session_map_lock_); assert(dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end() && "Error: Can't find the session!"); @@ -183,7 +175,9 @@ Session* ROCProfiler_Singleton::GetSession(rocprofiler_session_id_t session_id) } // Get Current Session ID -rocprofiler_session_id_t ROCProfiler_Singleton::GetCurrentSessionId() { return current_session_id_; } +rocprofiler_session_id_t ROCProfiler_Singleton::GetCurrentSessionId() { + return current_session_id_; +} void ROCProfiler_Singleton::SetCurrentActiveSession(rocprofiler_session_id_t session_id) { current_session_id_ = session_id; @@ -196,7 +190,7 @@ uint64_t ROCProfiler_Singleton::GetUniqueKernelDispatchId() { } size_t ROCProfiler_Singleton::GetKernelInfoSize(rocprofiler_kernel_info_kind_t kind, - rocprofiler_kernel_id_t kernel_id) { + rocprofiler_kernel_id_t kernel_id) { switch (kind) { case ROCPROFILER_KERNEL_NAME: return GetKernelNameUsingDispatchID(kernel_id.handle).size(); @@ -206,7 +200,7 @@ size_t ROCProfiler_Singleton::GetKernelInfoSize(rocprofiler_kernel_info_kind_t k } } const char* ROCProfiler_Singleton::GetKernelInfo(rocprofiler_kernel_info_kind_t kind, - rocprofiler_kernel_id_t kernel_id) { + rocprofiler_kernel_id_t kernel_id) { switch (kind) { case ROCPROFILER_KERNEL_NAME: return strdup(GetKernelNameUsingDispatchID(kernel_id.handle).c_str()); @@ -218,7 +212,7 @@ const char* ROCProfiler_Singleton::GetKernelInfo(rocprofiler_kernel_info_kind_t // TODO(aelwazir): To be implemented bool ROCProfiler_Singleton::CheckFilterData(rocprofiler_filter_kind_t filter_kind, - rocprofiler_filter_data_t filter_data) { + rocprofiler_filter_data_t filter_data) { return true; } diff --git a/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp b/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp index 92702a1159..e4c26060b2 100644 --- a/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp +++ b/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp @@ -152,17 +152,16 @@ void CheckPacketReqiurements(std::vector& gpu_agents) { // packets std::vector> InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, - std::vector& counter_names, bool is_spm) { + std::vector& counter_names, rocprofiler_session_id_t session_id, + bool is_spm) { hsa_status_t status = HSA_STATUS_SUCCESS; if (!counters_added.load(std::memory_order_acquire)) { for (auto& name : counter_names) { - if (rocprofiler::GetROCProfilerSingleton()->HasActiveSession()) { - rocprofiler::GetROCProfilerSingleton() - ->GetSession(rocprofiler::GetROCProfilerSingleton()->GetCurrentSessionId()) - ->GetProfiler() - ->AddCounterName(name); - } + rocprofiler::GetROCProfilerSingleton() + ->GetSession(session_id) + ->GetProfiler() + ->AddCounterName(name); } counters_added.exchange(true, std::memory_order_release); } diff --git a/projects/rocprofiler/src/core/hsa/packets/packets_generator.h b/projects/rocprofiler/src/core/hsa/packets/packets_generator.h index f7c23292c7..ad6886f6c3 100644 --- a/projects/rocprofiler/src/core/hsa/packets/packets_generator.h +++ b/projects/rocprofiler/src/core/hsa/packets/packets_generator.h @@ -41,7 +41,8 @@ typedef hsa_ext_amd_aql_pm4_packet_t packet_t; std::vector> InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, - std::vector& counter_names, bool is_spm = false); + std::vector& counter_names, rocprofiler_session_id_t session_id, + bool is_spm = false); uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_pool_t* cpu_pool); void GetCommandBufferMap(std::map); void GetOutputBufferMap(std::map); diff --git a/projects/rocprofiler/src/core/hsa/queues/queue.cpp b/projects/rocprofiler/src/core/hsa/queues/queue.cpp index 598f1d92ab..f2fa55e37d 100644 --- a/projects/rocprofiler/src/core/hsa/queues/queue.cpp +++ b/projects/rocprofiler/src/core/hsa/queues/queue.cpp @@ -366,8 +366,8 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool); if (buffer == NULL) fatal("Trace data buffer allocation failed"); - auto status = - rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, data_size); + auto status = rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, + data_size); if (status != HSA_STATUS_SUCCESS) fatal("Trace data memcopy to host failed"); record->shader_engine_data[se_index].buffer_ptr = buffer; @@ -386,7 +386,8 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { !GetROCProfilerSingleton()->GetSession(queue_info_session->session_id) || !GetROCProfilerSingleton()->GetSession(queue_info_session->session_id)->GetProfiler()) return true; - rocprofiler::Session* session = GetROCProfilerSingleton()->GetSession(queue_info_session->session_id); + rocprofiler::Session* session = + GetROCProfilerSingleton()->GetSession(queue_info_session->session_id); std::lock_guard lock(session->GetSessionLock()); rocprofiler::profiler::Profiler* profiler = session->GetProfiler(); std::vector pending_signals = const_cast&>( @@ -396,7 +397,8 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { for (auto it = pending_signals.begin(); it != pending_signals.end(); it = pending_signals.erase(it)) { auto& pending = *it; - if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal)) return true; + if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal)) + return true; hsa_amd_profiling_dispatch_time_t time; hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn( queue_info_session->agent, pending->original_signal, &time); @@ -434,14 +436,14 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { pending->profile) { if (xcc_id == 0) // call to GetCounterData() is required only once for a dispatch rocprofiler::metrics::GetCounterData(pending->profile, queue_info_session->agent, - pending->context->results_list); + pending->context->results_list); if (is_individual_xcc_mode) rocprofiler::metrics::GetCountersAndMetricResultsByXcc( xcc_id, pending->context->results_list, pending->context->results_map, pending->context->metrics_list); else rocprofiler::metrics::GetMetricsData(pending->context->results_map, - pending->context->metrics_list); + pending->context->metrics_list); AddRecordCounters(&record, pending); } else { if (session->FindBuffer(pending->buffer_id)) { @@ -454,8 +456,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { // TODO(aelwazir): we need a better way of distributing events and free them // if (pending->profile->output_buffer.ptr) // numa_free(pending->profile->output_buffer.ptr, pending->profile->output_buffer.size); - hsa_status_t status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn( - (pending->profile->output_buffer.ptr)); + hsa_status_t status = + rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn( + (pending->profile->output_buffer.ptr)); CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status); // if (pending->profile->command_buffer.ptr) // numa_free(pending->profile->command_buffer.ptr, pending->profile->command_buffer.size); @@ -488,7 +491,8 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { !GetROCProfilerSingleton()->GetSession(queue_info_session->session_id) || !GetROCProfilerSingleton()->GetSession(queue_info_session->session_id)->GetAttTracer()) return true; - rocprofiler::Session* session = GetROCProfilerSingleton()->GetSession(queue_info_session->session_id); + rocprofiler::Session* session = + GetROCProfilerSingleton()->GetSession(queue_info_session->session_id); rocprofiler::att::AttTracer* att_tracer = session->GetAttTracer(); std::vector& pending_signals = const_cast&>( @@ -499,7 +503,8 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { it = pending_signals.erase(it)) { auto& pending = *it; std::lock_guard lock(session->GetSessionLock()); - if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal)) return true; + if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal)) + return true; rocprofiler_record_att_tracer_t record{}; record.kernel_id = rocprofiler_kernel_id_t{pending.kernel_descriptor}; record.gpu_id = rocprofiler_agent_id_t{(uint64_t)queue_info_session->gpu_index}; @@ -602,7 +607,7 @@ std::vector att_counters_names; rocprofiler::Session* session = nullptr; -void ResetSessionID() { session_id = rocprofiler_session_id_t{0}; } +void ResetSessionID(rocprofiler_session_id_t id) { session_id = id; } void CheckNeededProfileConfigs() { rocprofiler_session_id_t internal_session_id; @@ -670,8 +675,9 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt std::vector transformed_packets; CheckNeededProfileConfigs(); + rocprofiler_session_id_t session_id_snapshot = session_id; - if (session_id.handle > 0 && pkt_count > 0 && + if (session_id_snapshot.handle > 0 && pkt_count > 0 && (is_counter_collection_mode || is_timestamp_collection_mode || is_pc_sampling_collection_mode) && session) { @@ -691,9 +697,10 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt // +Skip kernel dispatch IDs not wanted // Skip packets other than kernel dispatch packets. - if (bit_extract(original_packet.header, HSA_PACKET_HEADER_TYPE, + if (session_id_snapshot.handle == 0 || + bit_extract(original_packet.header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_TYPE + HSA_PACKET_HEADER_WIDTH_TYPE - 1) != - HSA_PACKET_TYPE_KERNEL_DISPATCH) { + HSA_PACKET_TYPE_KERNEL_DISPATCH) { transformed_packets.emplace_back(packets_arr[i]); continue; } @@ -702,7 +709,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt if (session_data_count > 0 && is_counter_collection_mode) { // Get the PM4 Packets using packets_generator profiles = Packet::InitializeAqlPackets(queue_info.GetCPUAgent(), queue_info.GetGPUAgent(), - session_data); + session_data, session_id_snapshot); replay_mode_count = profiles.size(); } @@ -740,14 +747,16 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt record_id); if (profiles.size() > 0 && replay_mode_count > 0) { session->GetProfiler()->AddPendingSignals( - writer_id, record_id, original_packet.completion_signal, dispatch_packet.completion_signal, session_id, buffer_id, - profile.first, profile.first->metrics_list.size(), profile.second, kernel_properties, + writer_id, record_id, original_packet.completion_signal, + dispatch_packet.completion_signal, session_id_snapshot, buffer_id, profile.first, + profile.first->metrics_list.size(), profile.second, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id); } else { session->GetProfiler()->AddPendingSignals( - writer_id, record_id, original_packet.completion_signal, dispatch_packet.completion_signal, session_id, buffer_id, - nullptr, 0, nullptr, kernel_properties, (uint32_t)syscall(__NR_gettid), - user_pkt_index, correlation_id); + writer_id, record_id, original_packet.completion_signal, + dispatch_packet.completion_signal, session_id_snapshot, buffer_id, nullptr, 0, + nullptr, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index, + correlation_id); } } @@ -792,16 +801,16 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt // marked complete SignalAsyncHandler( interrupt_signal, - new queue_info_session_t{queue_info.GetGPUAgent(), session_id, queue_info.GetQueueID(), - writer_id, interrupt_signal, agentInfo.getIndex(), - agentInfo.getXccCount()}); + new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot, + queue_info.GetQueueID(), writer_id, interrupt_signal, + agentInfo.getIndex(), agentInfo.getXccCount()}); ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_add(1, std::memory_order_relaxed); // profile_id++; // } while (replay_mode_count > 0 && profile_id < replay_mode_count); // Profiles loop end } /* Write the transformed packets to the hardware queue. */ writer(&transformed_packets[0], transformed_packets.size()); - } else if (session_id.handle > 0 && pkt_count > 0 && is_att_collection_mode && session && + } else if (session_id_snapshot.handle > 0 && pkt_count > 0 && is_att_collection_mode && session && KernelInterceptCount < MAX_ATT_PROFILES) { // att start // Getting Queue Data and Information @@ -941,11 +950,13 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt record_id); if (session && profile) { session->GetAttTracer()->AddPendingSignals( - writer_id, record_id, original_packet.completion_signal, dispatch_packet.completion_signal, session_id, buffer_id, profile, + writer_id, record_id, original_packet.completion_signal, + dispatch_packet.completion_signal, session_id_snapshot, buffer_id, profile, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index); } else { session->GetAttTracer()->AddPendingSignals( - writer_id, record_id, original_packet.completion_signal, dispatch_packet.completion_signal, session_id, buffer_id, nullptr, + writer_id, record_id, original_packet.completion_signal, + dispatch_packet.completion_signal, session_id_snapshot, buffer_id, nullptr, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index); } @@ -983,8 +994,8 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt // marked complete signalAsyncHandlerATT( interrupt_signal, - new queue_info_session_t{queue_info.GetGPUAgent(), session_id, queue_info.GetQueueID(), - writer_id, interrupt_signal}); + new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot, + queue_info.GetQueueID(), writer_id, interrupt_signal}); } /* Write the transformed packets to the hardware queue. */ writer(&transformed_packets[0], transformed_packets.size()); diff --git a/projects/rocprofiler/src/core/hsa/queues/queue.h b/projects/rocprofiler/src/core/hsa/queues/queue.h index fb413368a3..b4f8b96eb8 100644 --- a/projects/rocprofiler/src/core/hsa/queues/queue.h +++ b/projects/rocprofiler/src/core/hsa/queues/queue.h @@ -92,7 +92,7 @@ void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info); void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info); void CheckPacketReqiurements(std::vector& gpu_agents); -void ResetSessionID(); +void ResetSessionID(rocprofiler_session_id_t id = rocprofiler_session_id_t{0}); } // namespace queue } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/session/att/att.cpp b/projects/rocprofiler/src/core/session/att/att.cpp index d309022c2c..daa9d4664b 100644 --- a/projects/rocprofiler/src/core/session/att/att.cpp +++ b/projects/rocprofiler/src/core/session/att/att.cpp @@ -28,7 +28,6 @@ namespace att { AttTracer::AttTracer(rocprofiler_buffer_id_t buffer_id, rocprofiler_filter_id_t filter_id, rocprofiler_session_id_t session_id) : buffer_id_(buffer_id), filter_id_(filter_id), session_id_(session_id) {} -AttTracer::~AttTracer() {} void AttTracer::AddPendingSignals(uint32_t writer_id, uint64_t kernel_object, const hsa_signal_t& original_completion_signal, const hsa_signal_t& new_completion_signal, diff --git a/projects/rocprofiler/src/core/session/att/att.h b/projects/rocprofiler/src/core/session/att/att.h index 9950b3bcbe..337c841c3f 100644 --- a/projects/rocprofiler/src/core/session/att/att.h +++ b/projects/rocprofiler/src/core/session/att/att.h @@ -49,12 +49,13 @@ namespace att { class AttTracer { public: AttTracer(rocprofiler_buffer_id_t buffer_id, rocprofiler_filter_id_t filter_id, - rocprofiler_session_id_t session_id); - ~AttTracer(); + rocprofiler_session_id_t session_id); void AddPendingSignals(uint32_t writer_id, uint64_t kernel_object, - const hsa_signal_t& original_completion_signal, const hsa_signal_t& new_completion_signal, rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id, hsa_ven_amd_aqlprofile_profile_t* profile, + const hsa_signal_t& original_completion_signal, + const hsa_signal_t& new_completion_signal, + rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id, + hsa_ven_amd_aqlprofile_profile_t* profile, rocprofiler_kernel_properties_t kernel_properties, uint32_t thread_id, uint64_t queue_index); diff --git a/projects/rocprofiler/src/core/session/session.cpp b/projects/rocprofiler/src/core/session/session.cpp index 7a7460b440..d3596b24bd 100644 --- a/projects/rocprofiler/src/core/session/session.cpp +++ b/projects/rocprofiler/src/core/session/session.cpp @@ -48,24 +48,51 @@ Session::Session(rocprofiler_replay_mode_t replay_mode, rocprofiler_session_id_t Session::~Session() { while (GetCurrentActiveInterruptSignalsCount() > 0) { } - if (profiler_started_.load(std::memory_order_release)) { - rocprofiler::queue::ResetSessionID(); - delete profiler_; - profiler_started_.exchange(false, std::memory_order_release); + { + std::lock_guard lock(session_lock_); + if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION) && spmcounter_ && + spm_started_.load(std::memory_order_release)) { + delete spmcounter_; + } + if (FindFilterWithKind(ROCPROFILER_API_TRACE) && tracer_ && + tracer_started_.load(std::memory_order_release)) { + delete tracer_; + tracer_started_.exchange(false, std::memory_order_release); + } + if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION) && pc_sampler_ && + pc_sampler_started_.load(std::memory_order_release)) { + delete pc_sampler_; + pc_sampler_started_.exchange(false, std::memory_order_release); + } + + if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER) && counters_sampler_ && + counters_sampler_started_.load(std::memory_order_release)) { + delete counters_sampler_; + counters_sampler_started_.exchange(false, std::memory_order_release); + } + if ((FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION) || + FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) && + profiler_ && profiler_started_.load(std::memory_order_release)) { + rocprofiler::queue::ResetSessionID(); + delete profiler_; + profiler_started_.exchange(false, std::memory_order_release); + } + if (FindFilterWithKind(ROCPROFILER_ATT_TRACE_COLLECTION) && att_tracer_ && + att_tracer_started_.load(std::memory_order_release)) { + delete att_tracer_; + att_tracer_started_.exchange(false, std::memory_order_release); + } + for (auto& filter : filters_) { + if (filter) delete filter; + } + filters_.clear(); + for (auto& buffer : *buffers_) { + buffer.second->Flush(); + if (buffer.second) delete buffer.second; + } + buffers_->clear(); + if (buffers_) delete buffers_; } - // if (tracer_started_.load(std::memory_order_release)) { - // delete tracer_; - // tracer_started_.exchange(false, std::memory_order_release); - // } - if (att_tracer_started_.load(std::memory_order_release)) { - delete att_tracer_; - att_tracer_started_.exchange(false, std::memory_order_release); - } - // { - // std::lock_guard lock(filters_lock_); - // buffers_.clear(); - // } - delete buffers_; } void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) { @@ -76,9 +103,6 @@ void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) { GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) ->GetBufferId() .value == buffer_id.value)) { - if (profiler_started_.load(std::memory_order_release)) { - // Implement Disable Profiling - } } if (FindFilterWithKind(ROCPROFILER_API_TRACE) && GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetBufferId().value == @@ -92,43 +116,50 @@ void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) { void Session::Start() { std::lock_guard lock(session_lock_); if (!is_active_) { - if (FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) { - if (profiler_started_.load(std::memory_order_release)) delete profiler_; - profiler_ = new profiler::Profiler( - GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION))->GetBufferId(), - GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION))->GetId(), - session_id_); - profiler_started_.exchange(true, std::memory_order_release); - } + if (!profiler_started_.load(std::memory_order_release)) { + if (FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) { + profiler_ = new profiler::Profiler( + GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) + ->GetBufferId(), + GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION))->GetId(), + session_id_); + profiler_started_.exchange(true, std::memory_order_release); + } - if (FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) { - if (profiler_started_.load(std::memory_order_release)) delete profiler_; - profiler_ = new profiler::Profiler( - GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION))->GetBufferId(), - GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION))->GetId(), session_id_); - profiler_started_.exchange(true, std::memory_order_release); + if (FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) { + profiler_ = new profiler::Profiler( + GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION))->GetBufferId(), + GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION))->GetId(), session_id_); + profiler_started_.exchange(true, std::memory_order_release); + } + } else { + rocprofiler::queue::ResetSessionID(session_id_); } if (FindFilterWithKind(ROCPROFILER_ATT_TRACE_COLLECTION)) { - if (att_tracer_started_.load(std::memory_order_release)) delete att_tracer_; - att_tracer_ = new att::AttTracer( - GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetBufferId(), - GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetId(), session_id_); - att_tracer_started_.exchange(true, std::memory_order_release); + if (!att_tracer_started_.load(std::memory_order_release)) { + att_tracer_ = new att::AttTracer( + GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetBufferId(), + GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetId(), session_id_); + att_tracer_started_.exchange(true, std::memory_order_release); + } } if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) { - if (spm_started_.load(std::memory_order_release)) delete spmcounter_; - rocprofiler_spm_parameter_t* spmparameter = - GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetSpmParameterData(); - spmcounter_ = new spm::SpmCounters( - GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetBufferId(), - GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), spmparameter, - session_id_); - if (profiler_started_.load(std::memory_order_release)) delete profiler_; - profiler_ = new profiler::Profiler( - GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetBufferId(), - GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), session_id_); - profiler_started_.exchange(true, std::memory_order_release); + if (!spm_started_.load(std::memory_order_release)) { + rocprofiler_spm_parameter_t* spmparameter = + GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetSpmParameterData(); + spmcounter_ = new spm::SpmCounters( + GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetBufferId(), + GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), spmparameter, + session_id_); + } + if (!profiler_started_.load(std::memory_order_release)) { + profiler_ = new profiler::Profiler( + GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetBufferId(), + GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), session_id_); + profiler_started_.exchange(true, std::memory_order_release); + } + startSpm(); } if (FindFilterWithKind(ROCPROFILER_API_TRACE)) { @@ -168,7 +199,6 @@ void Session::Start() { } is_active_ = true; - if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) startSpm(); } } @@ -179,39 +209,29 @@ void Session::Terminate() { rocprofiler::queue::ResetSessionID(); std::lock_guard lock(session_lock_); if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) { - { - stopSpm(); - delete spmcounter_; - } + { stopSpm(); } } if (FindFilterWithKind(ROCPROFILER_API_TRACE)) { std::vector domains = GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData(); if (tracer_started_.load(std::memory_order_release)) { tracer_->StopRoctracer(); - delete tracer_; - tracer_started_.exchange(false, std::memory_order_release); } } if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION)) { if (pc_sampler_started_.load(std::memory_order_release)) { pc_sampler_->Stop(); - delete pc_sampler_; - pc_sampler_started_.exchange(false, std::memory_order_release); } } if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) { if (counters_sampler_started_.load(std::memory_order_release)) { counters_sampler_->Stop(); - delete counters_sampler_; - counters_sampler_started_.exchange(false, std::memory_order_release); } } for (auto& buffer : *buffers_) { buffer.second->Flush(); - delete buffer.second; } is_active_ = false; diff --git a/projects/rocprofiler/src/core/session/spm/spm.cpp b/projects/rocprofiler/src/core/session/spm/spm.cpp index fb5d0e800b..6d58cc14a3 100644 --- a/projects/rocprofiler/src/core/session/spm/spm.cpp +++ b/projects/rocprofiler/src/core/session/spm/spm.cpp @@ -99,17 +99,17 @@ std::mutex processQueueLock; // if (spm_buffer_params[idx].len) { // uint32_t pidx = preIndex.load(std::memory_order_release); // if (spm_buffer_params[idx].len == spm_buffer_params[pidx].size) { -// std::cout << "Buffer completely filled with bytes" << spm_buffer_params[idx].len << std::endl; -// fd = fopen("SPM_rocprofiler_data.txt", "wb"); -// size_t retele = fwrite(spm_buffer_params[pidx].addr, 1, spm_buffer_params[idx].len, fd); -// if (retele <= 0) rocprofiler::warning("SPM Data is wrong!"); -// fclose(fd); +// std::cout << "Buffer completely filled with bytes" << spm_buffer_params[idx].len << +// std::endl; fd = fopen("SPM_rocprofiler_data.txt", "wb"); size_t retele = +// fwrite(spm_buffer_params[pidx].addr, 1, spm_buffer_params[idx].len, fd); if (retele <= 0) +// rocprofiler::warning("SPM Data is wrong!"); fclose(fd); // } else { // std::cout << "Buffer partially filled with %d bytes" << spm_buffer_params[idx].len // << std::endl; // } // if (timeout) -// if (spm_buffer_params[idx].timeout == timeout) std::cout << "Timeout occurred" << std::endl; +// if (spm_buffer_params[idx].timeout == timeout) std::cout << "Timeout occurred" << +// std::endl; // ret = ROCPROFILER_STATUS_SUCCESS; // } else { // std::cout << "Data collection failed" << std::endl; @@ -188,7 +188,8 @@ std::mutex processQueueLock; // } // se++; // } -// record.header.id = rocprofiler_record_id_t{rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}; +// record.header.id = +// rocprofiler_record_id_t{rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}; // buffer->AddRecord(record); // nSample++; // index += 160; @@ -240,10 +241,11 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) { // advance command queue const uint64_t write_idx = - rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue, 1); + rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue, + 1); while ((write_idx - - rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn(queue)) >= - queue->size) { + rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn( + queue)) >= queue->size) { sched_yield(); // TODO: remove } @@ -262,7 +264,7 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) { // ringdoor bell rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal, - write_idx); + write_idx); return write_idx; } @@ -272,8 +274,8 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) { // // TODO: check if API args are correct, especially UINT32_MAX // hsa_status_t status; // status = rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_create_fn( -// gpu_agent, QUEUE_NUM_PACKETS, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr, UINT32_MAX, UINT32_MAX, -// queue); +// gpu_agent, QUEUE_NUM_PACKETS, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr, UINT32_MAX, +// UINT32_MAX, queue); // if (status != HSA_STATUS_SUCCESS) rocprofiler::fatal("queue creation failed"); @@ -294,7 +296,7 @@ hsa_signal_value_t signalWait(const hsa_signal_t& signal, const hsa_signal_value if (ret_value == exp_value) break; if (ret_value != signal_value) rocprofiler::fatal("Error: signalWait: signal_value(%lu), ret_value(%lu)", signal_value, - ret_value); + ret_value); } return ret_value; } @@ -315,6 +317,7 @@ spm::SpmCounters::SpmCounters(rocprofiler_buffer_id_t buffer_id, rocprofiler_fil get_hsa_agents_list(device_list_); defaultGpuNode_ = device_list_->gpu_devices[0]; defaultCpuNode_ = device_list_->cpu_devices[0]; + delete device_list_; // create signals hsa_status_t status = @@ -333,9 +336,9 @@ rocprofiler_status_t spm::SpmCounters::startSpm() { else // else choose the default node to collect SPM preferredGpuNode_ = defaultGpuNode_; - // hsa_agent_t preferred_cpu_agent = defaultCpuNode_; - // int counter_count = spmparameter_->counters_count; - // Packet::packet_t start_packet; + // hsa_agent_t preferred_cpu_agent = defaultCpuNode_; + // int counter_count = spmparameter_->counters_count; + // Packet::packet_t start_packet; #if 0 hsa_status_t hsa_status = hsa_support::GetAmdExtTable().hsa_amd_spm_acquire_fn(preferredGpuNode_); if (hsa_status == HSA_STATUS_SUCCESS) { @@ -393,7 +396,7 @@ rocprofiler_status_t spm::SpmCounters::startSpm() { return ROCPROFILER_STATUS_ERROR; } #endif - return ROCPROFILER_STATUS_SUCCESS; //delete this line with if 0 + return ROCPROFILER_STATUS_SUCCESS; // delete this line with if 0 } rocprofiler_status_t spm::SpmCounters::stopSpm() { diff --git a/projects/rocprofiler/src/core/session/spm/spm.h b/projects/rocprofiler/src/core/session/spm/spm.h index 08e3d5384a..062dbd78ee 100644 --- a/projects/rocprofiler/src/core/session/spm/spm.h +++ b/projects/rocprofiler/src/core/session/spm/spm.h @@ -39,7 +39,6 @@ class SpmCounters { public: SpmCounters(rocprofiler_buffer_id_t buffer_id, rocprofiler_filter_id_t filter_id, rocprofiler_spm_parameter_t* spmparameter, rocprofiler_session_id_t session_id); - ~SpmCounters(){}; rocprofiler_status_t startSpm(); rocprofiler_status_t stopSpm(); diff --git a/projects/rocprofiler/src/tools/tool.cpp b/projects/rocprofiler/src/tools/tool.cpp index 2671a68b9d..b28b76c42e 100644 --- a/projects/rocprofiler/src/tools/tool.cpp +++ b/projects/rocprofiler/src/tools/tool.cpp @@ -78,9 +78,13 @@ static const char* amd_sys_session_id; static int shm_fd_sn = -1; struct shmd_t* shmd; -std::thread wait_for_start_shm; +uint64_t flush_interval, trace_period, trace_delay; + +std::thread wait_for_start_shm, flush_thread, trace_period_thread; std::atomic amd_sys_handler{false}; std::atomic session_created{false}; +std::atomic trace_period_thread_control{false}; +std::atomic flush_thread_control{false}; [[maybe_unused]] static rocprofiler_session_id_t session_id; static std::vector filter_ids; @@ -202,10 +206,25 @@ rocprofiler::TraceBuffer roctx_trace_buffer( } // namespace -uint64_t getFlushIntervalFromEnv() { +void getFlushIntervalFromEnv() { const char* path = getenv("ROCPROFILER_FLUSH_INTERVAL"); - if (path) return std::stoll(std::string(path), nullptr, 0); - return 0; + if (path) + flush_interval = std::stoll(std::string(path), nullptr, 0); + else + flush_interval = 0; +} + +void getTracePeriodFromEnv() { + const char* path = getenv("ROCPROFILER_TRACE_PERIOD"); + if (path) { + std::string str = path; + trace_period = std::stoll(str.substr(0, str.find(":")), nullptr, 0); + trace_delay = std::stoll(str.substr(str.find(":") + 1), nullptr, 0); + std::cout << "trace_period: " << trace_period << " trace_delay: " << trace_delay << std::endl; + } else { + trace_period = 0; + trace_delay = 0; + } } std::vector GetCounterNames() { @@ -338,6 +357,14 @@ att_parsed_input_t GetATTParams() { } void finish() { + if (trace_period_thread_control.load(std::memory_order_relaxed)) { + trace_period_thread_control.exchange(false, std::memory_order_release); + trace_period_thread.join(); + } + if (flush_thread_control.load(std::memory_order_relaxed)) { + flush_thread_control.exchange(false, std::memory_order_release); + flush_thread.join(); + } for ([[maybe_unused]] rocprofiler_buffer_id_t buffer_id : buffer_ids) { CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id)); } @@ -351,6 +378,9 @@ void finish() { rocprofiler::TraceBufferBase::FlushAll(); CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id)); } + if (session_id.handle > 0) { + CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id)); + } } // load plugins @@ -487,6 +517,31 @@ static int info_callback(const rocprofiler_counter_info_t info, const char* gpu_ return 1; } +void flush_interval_func() { + while (flush_thread_control.load(std::memory_order_relaxed)) { + std::this_thread::sleep_for(std::chrono::milliseconds(flush_interval)); + for ([[maybe_unused]] rocprofiler_buffer_id_t buffer_id : buffer_ids) { + CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id)); + } + rocprofiler::TraceBufferBase::FlushAll(); + } +} + +void trace_period_func() { + while (trace_period_thread_control.load(std::memory_order_relaxed)) { + std::this_thread::sleep_for(std::chrono::milliseconds(trace_delay)); + + CHECK_ROCPROFILER(rocprofiler_start_session(session_id)); + session_created.exchange(true, std::memory_order_release); + + std::this_thread::sleep_for(std::chrono::milliseconds(trace_period)); + + session_created.exchange(false, std::memory_order_release); + rocprofiler::TraceBufferBase::FlushAll(); + CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id)); + } +} + extern "C" { // The HSA_AMD_TOOL_PRIORITY variable must be a constant value type @@ -569,6 +624,9 @@ ROCPROFILER_EXPORT bool OnLoad(void* table, uint64_t runtime_version, uint64_t f parameters.emplace_back(param); } + getFlushIntervalFromEnv(); + getTracePeriodFromEnv(); + CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id)); bool want_pc_sampling = getenv("ROCPROFILER_PC_SAMPLING"); @@ -705,7 +763,19 @@ ROCPROFILER_EXPORT bool OnLoad(void* table, uint64_t runtime_version, uint64_t f } } - if (getenv("ROCPROFILER_ENABLE_AMDSYS") == nullptr) { + // Flush buffers every given interval + if (flush_interval > 0) { + flush_thread = std::thread{flush_interval_func}; + flush_thread_control.exchange(true, std::memory_order_release); + } + + // Let session run for a given period of time + if (trace_period > 0) { + trace_period_thread = std::thread{trace_period_func}; + trace_period_thread_control.exchange(true, std::memory_order_release); + } + + if (amd_sys_session_id == nullptr && trace_period == 0) { CHECK_ROCPROFILER(rocprofiler_start_session(session_id)); session_created.exchange(true, std::memory_order_release); }