Solving timestamps and serialization merge
Change-Id: Icd14e49c06b19e4334adbdb98efed54af029b95b
[ROCm/rocprofiler commit: def7fa710a]
Цей коміт міститься в:
@@ -53,7 +53,6 @@
|
||||
#include "src/core/isa_capture/code_object_track.hpp"
|
||||
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_agent_t agent,
|
||||
@@ -487,12 +486,8 @@ hsa_status_t CodeObjectCallback(hsa_executable_t executable,
|
||||
if (data.codeobj.unload)
|
||||
codeobj_capture_instance::Unload(data.codeobj.load_base);
|
||||
else
|
||||
codeobj_capture_instance::Load(
|
||||
data.codeobj.load_base,
|
||||
uri_str,
|
||||
data.codeobj.memory_base,
|
||||
data.codeobj.memory_size
|
||||
);
|
||||
codeobj_capture_instance::Load(data.codeobj.load_base, uri_str, data.codeobj.memory_base,
|
||||
data.codeobj.memory_size);
|
||||
|
||||
hsa_executable_iterate_agent_symbols(executable, data.codeobj.agent,
|
||||
hsa_executable_iteration_callback, &(data.codeobj.unload));
|
||||
@@ -528,6 +523,37 @@ hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
|
||||
return hsasupport_singleton.GetCoreApiTable().hsa_executable_destroy_fn(executable);
|
||||
}
|
||||
|
||||
hsa_status_t GetDispatchTimestamps(hsa_agent_t agent, hsa_signal_t signal,
|
||||
hsa_amd_profiling_dispatch_time_t* time) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.signals_timestamps_map_lock);
|
||||
auto entry = hsasupport_singleton.signals_timestamps.find(signal.handle);
|
||||
if (entry == hsasupport_singleton.signals_timestamps.end()) {
|
||||
return hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
|
||||
agent, signal, time);
|
||||
}
|
||||
if (entry->second.time.has_value()) {
|
||||
*time = entry->second.time.value();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
return hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
|
||||
agent, entry->second.new_signal, time);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t DestroySignal(hsa_signal_t signal) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.signals_timestamps_map_lock);
|
||||
auto entry = hsasupport_singleton.signals_timestamps.find(signal.handle);
|
||||
if (entry != hsasupport_singleton.signals_timestamps.end()) {
|
||||
hsasupport_singleton.signals_timestamps.erase(entry);
|
||||
}
|
||||
return hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(signal);
|
||||
}
|
||||
|
||||
std::atomic<bool> profiling_async_copy_enable{false};
|
||||
|
||||
hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) {
|
||||
@@ -550,7 +576,7 @@ void MemoryASyncCopyHandler(const Tracker::entry_t* entry) {
|
||||
record.begin_ns = entry->begin;
|
||||
record.end_ns = entry->end;
|
||||
if (entry->agent.handle > 0) {
|
||||
//FIXME: Not a unique id across GPU and CPU
|
||||
// FIXME: Not a unique id across GPU and CPU
|
||||
rocprofiler::HSAAgentInfo& agent_info =
|
||||
hsasupport_singleton.GetHSAAgentInfo(entry->agent.handle);
|
||||
if (agent_info.GetType() == HSA_DEVICE_TYPE_GPU)
|
||||
@@ -735,15 +761,15 @@ HSASupport_Singleton& HSASupport_Singleton::GetInstance() {
|
||||
return *instance;
|
||||
}
|
||||
|
||||
CoreApiTable& HSASupport_Singleton::GetCoreApiTable() { return saved_core_api; }
|
||||
CoreApiTable& HSASupport_Singleton::GetCoreApiTable() { return saved_core_api; }
|
||||
|
||||
void HSASupport_Singleton::SetCoreApiTable(CoreApiTable& table) { saved_core_api = table; }
|
||||
|
||||
AmdExtTable& HSASupport_Singleton::GetAmdExtTable() { return saved_amd_ext_api; }
|
||||
AmdExtTable& HSASupport_Singleton::GetAmdExtTable() { return saved_amd_ext_api; }
|
||||
|
||||
void HSASupport_Singleton::SetAmdExtTable(AmdExtTable& table) { saved_amd_ext_api = table; }
|
||||
|
||||
hsa_ven_amd_loader_1_01_pfn_t& HSASupport_Singleton::GetHSALoaderApi() { return hsa_loader_api; }
|
||||
hsa_ven_amd_loader_1_01_pfn_t& HSASupport_Singleton::GetHSALoaderApi() { return hsa_loader_api; }
|
||||
|
||||
void HSASupport_Singleton::SetHSALoaderApi() {
|
||||
hsa_status_t status = GetCoreApiTable().hsa_system_get_major_extension_table_fn(
|
||||
@@ -822,11 +848,11 @@ void HSASupport_Singleton::FinitKsymbols() {
|
||||
}
|
||||
|
||||
|
||||
|
||||
void queues_deleter ::operator()(void* queue) const { delete static_cast<queue::Queue*>(queue); }
|
||||
|
||||
|
||||
void HSASupport_Singleton::AddQueue(hsa_queue_t* queue, std::unique_ptr<void, queues_deleter&>rocprofiler_queue) {
|
||||
void HSASupport_Singleton::AddQueue(hsa_queue_t* queue,
|
||||
std::unique_ptr<void, queues_deleter&> rocprofiler_queue) {
|
||||
std::lock_guard<std::mutex> queues_mutex_lock(queues_mutex_);
|
||||
queues.emplace(queue, std::move(rocprofiler_queue));
|
||||
}
|
||||
@@ -887,12 +913,12 @@ hsa_status_t QueueDestroyInterceptor(hsa_queue_t* hsa_queue) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) {
|
||||
|
||||
static std::map<uint64_t, MetricsDict*> metricsDicts;
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
for(auto it = hsasupport_singleton.gpu_agents.begin(); it != hsasupport_singleton.gpu_agents.end(); it++) {
|
||||
HSAAgentInfo& agent_Info = hsasupport_singleton.GetHSAAgentInfo(it->handle);
|
||||
metricsDicts.emplace(agent_Info.getHandle(), rocprofiler::MetricsDict::Create(&agent_Info)) ;
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
for (auto it = hsasupport_singleton.gpu_agents.begin();
|
||||
it != hsasupport_singleton.gpu_agents.end(); it++) {
|
||||
HSAAgentInfo& agent_Info = hsasupport_singleton.GetHSAAgentInfo(it->handle);
|
||||
metricsDicts.emplace(agent_Info.getHandle(), rocprofiler::MetricsDict::Create(&agent_Info));
|
||||
}
|
||||
uint32_t gpu_counter = 0;
|
||||
for (auto metricsDictAgent : metricsDicts) {
|
||||
@@ -964,12 +990,10 @@ bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_i
|
||||
// }
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void HSASupport_Singleton::HSAInitialize(HsaApiTable* table) {
|
||||
InitKsymbols();
|
||||
// Save the HSA core api and amd_ext api.
|
||||
@@ -1045,6 +1069,13 @@ void HSASupport_Singleton::HSAInitialize(HsaApiTable* table) {
|
||||
agent_info.kernarg_pool_ = near_cpu_agent_info.kernarg_pool_;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(
|
||||
HSASupport_Singleton::GetInstance().signals_timestamps_map_lock);
|
||||
HSASupport_Singleton::GetInstance().signals_timestamps =
|
||||
std::map<uint64_t, new_signal_timestamp_t>();
|
||||
}
|
||||
|
||||
rocprofiler::queue::CheckPacketReqiurements();
|
||||
SetHSALoaderApi();
|
||||
|
||||
@@ -1073,6 +1104,10 @@ void HSASupport_Singleton::HSAInitialize(HsaApiTable* table) {
|
||||
table->core_->hsa_executable_freeze_fn = roctracer::hsa_support::ExecutableFreezeIntercept;
|
||||
table->core_->hsa_executable_destroy_fn = roctracer::hsa_support::ExecutableDestroyIntercept;
|
||||
|
||||
table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn =
|
||||
roctracer::hsa_support::GetDispatchTimestamps;
|
||||
table->core_->hsa_signal_destroy_fn = roctracer::hsa_support::DestroySignal;
|
||||
|
||||
// Install the HSA_API wrappers
|
||||
roctracer::hsa_support::detail::InstallCoreApiWrappers(table->core_);
|
||||
roctracer::hsa_support::detail::InstallAmdExtWrappers(table->amd_ext_);
|
||||
|
||||
@@ -28,9 +28,11 @@
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
@@ -118,15 +120,20 @@ class HSAAgentInfo {
|
||||
|
||||
|
||||
struct queues_deleter {
|
||||
queues_deleter() {};
|
||||
queues_deleter(queues_deleter&) { };
|
||||
void operator() (void * queue) const;
|
||||
queues_deleter(){};
|
||||
queues_deleter(queues_deleter&){};
|
||||
void operator()(void* queue) const;
|
||||
};
|
||||
|
||||
struct new_signal_timestamp_t {
|
||||
hsa_signal_t new_signal;
|
||||
std::optional<hsa_amd_profiling_dispatch_time_t> time;
|
||||
};
|
||||
|
||||
class HSASupport_Singleton {
|
||||
private:
|
||||
HSASupport_Singleton() {};
|
||||
~HSASupport_Singleton() = delete;
|
||||
HSASupport_Singleton(){};
|
||||
~HSASupport_Singleton() = delete;
|
||||
CoreApiTable saved_core_api;
|
||||
AmdExtTable saved_amd_ext_api;
|
||||
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api;
|
||||
@@ -141,28 +148,29 @@ class HSASupport_Singleton {
|
||||
void SetHSALoaderApi();
|
||||
|
||||
public:
|
||||
std::vector<hsa_agent_t> gpu_agents;
|
||||
HSAAgentInfo& GetHSAAgentInfo(uint64_t agent_handle);
|
||||
HSAAgentInfo& GetHSAAgentInfo(Agent::DeviceInfo device_info);
|
||||
Agent::DeviceInfo& GetDeviceInfo(HSAAgentInfo* agent_info);
|
||||
std::mutex kernel_names_map_lock;
|
||||
std::map<std::string, std::vector<uint64_t>>* kernel_names;
|
||||
std::mutex ksymbol_map_lock;
|
||||
std::map<uint64_t, std::string>* ksymbols;
|
||||
void SetHSAAgentInfo(hsa_agent_t agent, HSAAgentInfo hsa_agent_info);
|
||||
static HSASupport_Singleton& GetInstance();
|
||||
CoreApiTable& GetCoreApiTable();
|
||||
AmdExtTable& GetAmdExtTable();
|
||||
hsa_ven_amd_loader_1_01_pfn_t& GetHSALoaderApi();
|
||||
void AddQueue(hsa_queue_t* queue, std::unique_ptr<void, queues_deleter&>);
|
||||
void RemoveQueue(hsa_queue_t* queue);
|
||||
void HSAInitialize(HsaApiTable* Table);
|
||||
void HSAFinalize();
|
||||
void InitKsymbols();
|
||||
void FinitKsymbols();
|
||||
HSASupport_Singleton(const HSASupport_Singleton&) = delete;
|
||||
HSASupport_Singleton& operator=(const HSASupport_Singleton&) = delete;
|
||||
|
||||
std::vector<hsa_agent_t> gpu_agents;
|
||||
HSAAgentInfo& GetHSAAgentInfo(uint64_t agent_handle);
|
||||
HSAAgentInfo& GetHSAAgentInfo(Agent::DeviceInfo device_info);
|
||||
Agent::DeviceInfo& GetDeviceInfo(HSAAgentInfo* agent_info);
|
||||
std::mutex kernel_names_map_lock;
|
||||
std::map<std::string, std::vector<uint64_t>>* kernel_names;
|
||||
std::mutex ksymbol_map_lock;
|
||||
std::map<uint64_t, std::string>* ksymbols;
|
||||
std::mutex signals_timestamps_map_lock;
|
||||
std::map<uint64_t, new_signal_timestamp_t> signals_timestamps;
|
||||
void SetHSAAgentInfo(hsa_agent_t agent, HSAAgentInfo hsa_agent_info);
|
||||
static HSASupport_Singleton& GetInstance();
|
||||
CoreApiTable& GetCoreApiTable();
|
||||
AmdExtTable& GetAmdExtTable();
|
||||
hsa_ven_amd_loader_1_01_pfn_t& GetHSALoaderApi();
|
||||
void AddQueue(hsa_queue_t* queue, std::unique_ptr<void, queues_deleter&>);
|
||||
void RemoveQueue(hsa_queue_t* queue);
|
||||
void HSAInitialize(HsaApiTable* Table);
|
||||
void HSAFinalize();
|
||||
void InitKsymbols();
|
||||
void FinitKsymbols();
|
||||
HSASupport_Singleton(const HSASupport_Singleton&) = delete;
|
||||
HSASupport_Singleton& operator=(const HSASupport_Singleton&) = delete;
|
||||
};
|
||||
|
||||
bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
@@ -460,10 +462,16 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
it = pending_signals.erase(it)) {
|
||||
auto& pending = *it;
|
||||
if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal))
|
||||
return true;
|
||||
return true;
|
||||
hsa_amd_profiling_dispatch_time_t time;
|
||||
hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
|
||||
queue_info_session->agent, pending->original_signal, &time);
|
||||
queue_info_session->agent, pending->new_signal, &time);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.signals_timestamps_map_lock);
|
||||
hsasupport_singleton.signals_timestamps[pending->original_signal.handle].time =
|
||||
std::make_optional(time);
|
||||
}
|
||||
//hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
|
||||
uint32_t record_count = 1;
|
||||
bool is_individual_xcc_mode = false;
|
||||
uint32_t xcc_count = queue_info_session->xcc_count;
|
||||
@@ -532,14 +540,13 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
delete it.second;
|
||||
}
|
||||
delete pending->context;
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
Check if the dispatch ready is empty, If so, there is no more
|
||||
dispatches to be launched and we return. Else, dispatch the
|
||||
kernel of the queue in the front of the dispatch_ready.
|
||||
*/
|
||||
profiler_serializer_t& serializer =
|
||||
|
||||
profiler_serializer_t& serializer =
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance().GetSerializer();
|
||||
std::lock_guard<std::mutex> serializer_lock(serializer.serializer_mutex);
|
||||
assert(serializer.dispatch_queue != nullptr);
|
||||
@@ -550,8 +557,13 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
Queue* queue = serializer.dispatch_ready.front();
|
||||
serializer.dispatch_ready.erase(serializer.dispatch_ready.begin());
|
||||
enable_dispatch(queue);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (pending->new_signal.handle)
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
|
||||
if (queue_info_session->interrupt_signal.handle)
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(
|
||||
queue_info_session->interrupt_signal);
|
||||
@@ -645,7 +657,10 @@ void CreateBarrierPacket(std::vector<Packet::packet_t>* transformed_packets,
|
||||
const hsa_signal_t* packet_completion_signal
|
||||
) {
|
||||
hsa_barrier_and_packet_t barrier{0};
|
||||
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
if (packet_completion_signal != nullptr) barrier.completion_signal = *packet_completion_signal;
|
||||
if (packet_dependency_signal != nullptr) barrier.dep_signal[0] = *packet_dependency_signal;
|
||||
void* barrier_ptr = &barrier;
|
||||
@@ -928,16 +943,17 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
profiles = Packet::InitializeAqlPackets(queue_info.GetCPUAgent(), queue_info.GetGPUAgent(),
|
||||
session_data, session_id_snapshot);
|
||||
replay_mode_count = profiles.size();
|
||||
|
||||
}
|
||||
|
||||
uint32_t profile_id = 0;
|
||||
// do {
|
||||
|
||||
std::pair<rocprofiler::profiling_context_t*, hsa_ven_amd_aqlprofile_profile_t*> profile;
|
||||
if (profiles.size() > 0 && replay_mode_count > 0) profile = profiles.at(profile_id);
|
||||
|
||||
hsa_signal_t ready_signal = queue_info.GetReadySignal();
|
||||
hsa_signal_t block_signal = queue_info.GetBlockSignal();
|
||||
if (profiles.size() > 0 && replay_mode_count > 0) {
|
||||
profile = profiles.at(profile_id);
|
||||
hsa_signal_t ready_signal = queue_info.GetReadySignal();
|
||||
hsa_signal_t block_signal = queue_info.GetBlockSignal();
|
||||
|
||||
/*
|
||||
Creates a barrier packet with its completion signal as the
|
||||
@@ -950,6 +966,8 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
packet waiting on it to be 0 will be blocked
|
||||
*/
|
||||
CreateBarrierPacket(&transformed_packets, &block_signal, &block_signal);
|
||||
}
|
||||
|
||||
|
||||
uint32_t writer_id = WRITER_ID.fetch_add(1, std::memory_order_release);
|
||||
|
||||
@@ -971,6 +989,7 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
uint64_t correlation_id = dispatch_packet.reserved2;
|
||||
|
||||
CreateSignal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &packet.completion_signal);
|
||||
|
||||
// Adding the dispatch packet newly created signal to the pending signals
|
||||
// list to be processed by the signal interrupt
|
||||
rocprofiler_kernel_properties_t kernel_properties =
|
||||
@@ -981,16 +1000,14 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
record_id);
|
||||
if (session_data_count > 0 && profile.second) {
|
||||
session->GetProfiler()->AddPendingSignals(
|
||||
writer_id, record_id, original_packet.completion_signal,
|
||||
dispatch_packet.completion_signal, session_id, buffer_id, profile.first,
|
||||
session_data_count, profile.second, kernel_properties, (uint32_t)syscall(__NR_gettid),
|
||||
user_pkt_index, correlation_id);
|
||||
writer_id, record_id, original_packet.completion_signal, packet.completion_signal,
|
||||
session_id, buffer_id, profile.first, session_data_count, profile.second,
|
||||
kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id);
|
||||
} else {
|
||||
session->GetProfiler()->AddPendingSignals(
|
||||
writer_id, record_id, original_packet.completion_signal,
|
||||
dispatch_packet.completion_signal, session_id, buffer_id, nullptr, session_data_count,
|
||||
nullptr, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index,
|
||||
correlation_id);
|
||||
writer_id, record_id, original_packet.completion_signal, packet.completion_signal,
|
||||
session_id, buffer_id, nullptr, session_data_count, nullptr, kernel_properties,
|
||||
(uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -998,11 +1015,22 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
// packet and create a new signal for it to get timestamps
|
||||
if (original_packet.completion_signal.handle) {
|
||||
hsa_barrier_and_packet_t barrier{};
|
||||
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
Packet::packet_t* __attribute__((__may_alias__)) pkt =
|
||||
(reinterpret_cast<Packet::packet_t*>(&barrier));
|
||||
transformed_packets.emplace_back(*pkt).completion_signal =
|
||||
original_packet.completion_signal;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(
|
||||
HSASupport_Singleton::GetInstance().signals_timestamps_map_lock);
|
||||
HSASupport_Singleton::GetInstance()
|
||||
.signals_timestamps[original_packet.completion_signal.handle] =
|
||||
new_signal_timestamp_t{packet.completion_signal, std::nullopt};
|
||||
}
|
||||
}
|
||||
|
||||
hsa_signal_t interrupt_signal{};
|
||||
@@ -1022,17 +1050,14 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
|
||||
// Added Interrupt Signal with barrier and provided handler for it
|
||||
CreateBarrierPacket( &transformed_packets, &interrupt_signal, nullptr);
|
||||
} else {
|
||||
hsa_barrier_and_packet_t barrier{};
|
||||
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
barrier.completion_signal = interrupt_signal;
|
||||
Packet::packet_t* __attribute__((__may_alias__)) pkt =
|
||||
(reinterpret_cast<Packet::packet_t*>(&barrier));
|
||||
transformed_packets.emplace_back(*pkt);
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
CreateBarrierPacket( &transformed_packets, nullptr, &interrupt_signal);
|
||||
rocprofiler::HSAAgentInfo& agentInfo =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(
|
||||
queue_info.GetGPUAgent().handle);
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(
|
||||
queue_info.GetGPUAgent().handle);
|
||||
// Creating Async Handler to be called every time the interrupt signal is
|
||||
// marked complete
|
||||
SignalAsyncHandler(
|
||||
@@ -1044,8 +1069,8 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_add(1, std::memory_order_relaxed);
|
||||
// profile_id++;
|
||||
// } while (replay_mode_count > 0 && profile_id < replay_mode_count); // Profiles loop end
|
||||
}
|
||||
|
||||
}
|
||||
/* Write the transformed packets to the hardware queue. */
|
||||
writer(&transformed_packets[0], transformed_packets.size());
|
||||
} else if (session_id_snapshot.handle > 0 && pkt_count > 0 && is_att_collection_mode && session &&
|
||||
@@ -1113,6 +1138,7 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
auto& dispatch_packet = reinterpret_cast<hsa_kernel_dispatch_packet_t&>(packet);
|
||||
|
||||
CreateSignal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &packet.completion_signal);
|
||||
|
||||
// Adding the dispatch packet newly created signal to the pending signals
|
||||
// list to be processed by the signal interrupt
|
||||
rocprofiler_kernel_properties_t kernel_properties =
|
||||
@@ -1122,9 +1148,9 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
record_id);
|
||||
|
||||
session->GetAttTracer()->AddPendingSignals(
|
||||
writer_id, record_id, original_packet.completion_signal,
|
||||
dispatch_packet.completion_signal, session_id_snapshot, buffer_id, profile,
|
||||
kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index);
|
||||
writer_id, record_id, original_packet.completion_signal, packet.completion_signal,
|
||||
session_id_snapshot, buffer_id, profile, kernel_properties,
|
||||
(uint32_t)syscall(__NR_gettid), user_pkt_index);
|
||||
|
||||
uint64_t userdata = HSASupport_Singleton::GetInstance()
|
||||
.GetHSAAgentInfo(queue_info.GetGPUAgent().handle)
|
||||
@@ -1139,6 +1165,7 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u
|
||||
if (original_packet.completion_signal.handle != 0U) {
|
||||
hsa_barrier_and_packet_t barrier{};
|
||||
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
barrier.dep_signal[0] = packet.completion_signal;
|
||||
Packet::packet_t* __attribute__((__may_alias__)) pkt =
|
||||
(reinterpret_cast<Packet::packet_t*>(&barrier));
|
||||
transformed_packets.emplace_back(*pkt).completion_signal =
|
||||
|
||||
@@ -281,7 +281,7 @@ TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenEndTimeIsGreaterThenStartTi
|
||||
|
||||
for (auto& itr : current_kernel_info) {
|
||||
if (!(itr.begin_time).empty() && !(itr.end_time).empty()) {
|
||||
EXPECT_GT(itr.end_time, itr.begin_time);
|
||||
EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -347,7 +347,7 @@ TEST_F(VectorAddTest, WhenRunningProfilerWithAppThenEndTimeIsGreaterThenStartTim
|
||||
|
||||
for (auto& itr : current_kernel_info) {
|
||||
if (!(itr.begin_time).empty() && !(itr.end_time).empty()) {
|
||||
EXPECT_GT(itr.end_time, itr.begin_time);
|
||||
EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,6 +262,22 @@ TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelDurationShouldBePositiv
|
||||
EXPECT_GT(current_kernel_info.size(), 0);
|
||||
}
|
||||
|
||||
// Test:4 Compares end-time is greater than start-time in current
|
||||
// tracer output
|
||||
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenEndTimeIsGreaterThenStartTime) {
|
||||
// kernel info in current profiler run
|
||||
std::vector<tracer_kernel_info_t> current_kernel_info;
|
||||
|
||||
GetKernelInfoForRunningApplication(¤t_kernel_info);
|
||||
ASSERT_TRUE(current_kernel_info.size());
|
||||
|
||||
for (auto& itr : current_kernel_info) {
|
||||
if (!(itr.begin_time).empty() && !(itr.end_time).empty()) {
|
||||
EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ###################################################
|
||||
|
||||
@@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "test_utils.h"
|
||||
#include <regex>
|
||||
|
||||
namespace rocprofiler {
|
||||
namespace tests {
|
||||
@@ -130,10 +131,6 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo) {
|
||||
std::getline(tokenStream, token, ',');
|
||||
kinfo.function = token;
|
||||
std::getline(tokenStream, token, ',');
|
||||
kinfo.operation = token;
|
||||
std::getline(tokenStream, token, ',');
|
||||
kinfo.kernel_name = token;
|
||||
std::getline(tokenStream, token, ',');
|
||||
kinfo.begin_time = token;
|
||||
std::getline(tokenStream, token, ',');
|
||||
kinfo.end_time = token;
|
||||
@@ -145,6 +142,19 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo) {
|
||||
kinfo.roxtx_msg = token;
|
||||
}
|
||||
|
||||
// get numeric value of timestamp token
|
||||
uint64_t get_timestamp_value(const std::string& str) {
|
||||
std::regex pattern("(\\d+)");
|
||||
std::smatch match;
|
||||
|
||||
if (regex_search(str, match, pattern)) {
|
||||
return stoul(match[1]);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace utility
|
||||
} // namespace tests
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -27,6 +27,7 @@ THE SOFTWARE.
|
||||
#include <execinfo.h> // for backtrace
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
@@ -86,6 +87,9 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo);
|
||||
// tokenize tracer output
|
||||
void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo);
|
||||
|
||||
// get numeric value of timestamp token
|
||||
uint64_t get_timestamp_value(const std::string& str);
|
||||
|
||||
} // namespace utility
|
||||
} // namespace tests
|
||||
} // namespace rocprofiler
|
||||
@@ -94,6 +98,7 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo);
|
||||
// path for executable
|
||||
int main(int argc, char** argv);
|
||||
|
||||
using rocprofiler::tests::utility::get_timestamp_value;
|
||||
using rocprofiler::tests::utility::GetNumberOfCores;
|
||||
using rocprofiler::tests::utility::GetRunningPath;
|
||||
using rocprofiler::tests::utility::is_installed_path;
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача