diff --git a/bin/rocprofv2 b/bin/rocprofv2 index 3d17578529..adcec35064 100755 --- a/bin/rocprofv2 +++ b/bin/rocprofv2 @@ -60,6 +60,8 @@ usage() { echo -e "\t#${GREY} usage e.g: rocprofv2 --hip-trace -fi 1000 ${RESET}\n" echo -e "${GREEN}-tp | --trace-period ${RESET} Specifies a trace period in milliseconds, with format \"-tp ::\"." echo -e "\t#${GREY} usage e.g: rocprofv2 --hip-trace -tp 1000:2000:4000 ${RESET}\n" + echo -e "${GREEN}-ns | --no-serialization ${RESET} For disabling serilization when running in counter-collection mode\"." + echo -e "\t#${GREY} usage e.g: rocprofv2 -i pmc.txt -ns${RESET}\n" exit 1 } @@ -140,6 +142,9 @@ while [ 1 ]; do elif [ "$1" == "--hip-api" ]; then export ROCPROFILER_HIP_API_TRACE=1 shift + elif [[ "$1" == "-ns" || "$1" == "--no-serialization" ]]; then + export ROCPROFILER_NO_SERIALIZATION=1 + shift elif [[ "$1" == "--hip-activity" || "$1" == "--hip-trace" ]]; then export ROCPROFILER_HIP_API_TRACE=1 export ROCPROFILER_HIP_ACTIVITY_TRACE=1 diff --git a/src/core/hsa/queues/queue.cpp b/src/core/hsa/queues/queue.cpp index 56e9edc3f9..8df3d8fb7c 100644 --- a/src/core/hsa/queues/queue.cpp +++ b/src/core/hsa/queues/queue.cpp @@ -386,6 +386,16 @@ void SignalAsyncReadyHandler(const hsa_signal_t& signal, void* data) { signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalReadyHandler, data); if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_signal_async_handler failed"); } + +bool GetNoSerialization() { + const static bool no_serialization = []() { + const char* str = getenv("ROCPROFILER_NO_SERIALIZATION"); + if (str != NULL) return (atol(str) > 0); + return false; + }(); + return no_serialization; +} + bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { auto queue_info_session = static_cast(data); @@ -475,20 +485,21 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) if (pending->counters_count > 0 && profile && profile->events) { Packet::AQLPacketProfile::MoveToCache(queue_info_session->agent, std::move(pending->profile)); - - profiler_serializer_t& serializer = - rocprofiler::ROCProfiler_Singleton::GetInstance().GetSerializer(); - std::lock_guard serializer_lock(serializer.serializer_mutex); - assert(serializer.dispatch_queue != nullptr); - hsasupport_singleton.GetCoreApiTable().hsa_signal_store_screlease_fn( + if (!GetNoSerialization()) { + profiler_serializer_t& serializer = + rocprofiler::ROCProfiler_Singleton::GetInstance().GetSerializer(); + std::lock_guard serializer_lock(serializer.serializer_mutex); + assert(serializer.dispatch_queue != nullptr); + hsasupport_singleton.GetCoreApiTable().hsa_signal_store_screlease_fn( queue_info_session->block_signal, 1); - serializer.dispatch_queue = nullptr; - if (!serializer.dispatch_ready.empty()) - { - Queue* queue = serializer.dispatch_ready.front(); - serializer.dispatch_ready.erase(serializer.dispatch_ready.begin()); - enable_dispatch(queue); - } + serializer.dispatch_queue = nullptr; + if (!serializer.dispatch_ready.empty()) + { + Queue* queue = serializer.dispatch_ready.front(); + serializer.dispatch_ready.erase(serializer.dispatch_ready.begin()); + enable_dispatch(queue); + } + } } if (pending->new_signal.handle) @@ -518,6 +529,7 @@ void CreateSignal(uint32_t attribute, hsa_signal_t* signal) { rocprofiler_session_id_t Queue::session_id = rocprofiler_session_id_t{0}; std::shared_mutex Queue::session_id_mutex; + // Counter Names declaration std::vector session_data; @@ -661,7 +673,7 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u } } - if (profile_packet.get()) + if (profile_packet.get() && !GetNoSerialization()) { hsa_signal_t ready_signal = queue_info.GetReadySignal(); hsa_signal_t block_signal = queue_info.GetBlockSignal(); diff --git a/src/tools/rocprofv2/rocprofv2.cpp b/src/tools/rocprofv2/rocprofv2.cpp index 9e04028cf2..60dabdc7b1 100644 --- a/src/tools/rocprofv2/rocprofv2.cpp +++ b/src/tools/rocprofv2/rocprofv2.cpp @@ -190,6 +190,13 @@ void print_usage(fs::path current_path) { "::\n\t\t\t"); fmt::print(fg(fmt::color::gray), "usage e.g: rocprofv2 --hip-trace -tp 1000:2000:4000 [target]\n"); + + // no serialization + fmt::print(fg(fmt::color::cyan), " -ns | --no-serialization\n\t\t\t"); + fmt::print( + "For disabling serilization when running in counter-collection mode\n\t\t\t"); + fmt::print(fg(fmt::color::gray), + "usage e.g: rocprofv2 -i pmc.txt -ns\n"); } // runs memory check on hip_vectoradd @@ -510,6 +517,11 @@ int main(int argc, char** argv) { rocprofv2::print_usage(current_path); exit(EXIT_FAILURE); } + // no serialization for counter-collection mode + } else if (strcmp(argv[i], "-ns") == 0 || strcmp(argv[i], "--no-serialization") == 0) { + if (argv[i]) { + pathenv.emplace_back("ROCPROFILER_NO_SERIALIZATION=1"); + } // wrong argument given } else if (argv[i][0] == '-') { std::cerr << "Wrong option (" << argv[i] << "), Please use the following options:\n"