From 7ba49616e95fbcfd932dda573f2cec20e825cec0 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Mon, 5 Dec 2022 17:12:13 -0800 Subject: [PATCH] SWDEV-371123 - Use barrier value packet for event records Change-Id: I5e5e5e89e0d96a2430b4682d168b76848fa5b94e [ROCm/clr commit: 4f64d89026739ed201f1cbb7de4b2d497c1ab7f0] --- projects/clr/rocclr/device/rocm/rocsettings.cpp | 9 ++++++++- projects/clr/rocclr/device/rocm/rocsettings.hpp | 3 ++- projects/clr/rocclr/device/rocm/rocvirtual.cpp | 13 +++++++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index 49a371bbcc..71c341ad43 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -102,12 +102,14 @@ Settings::Settings() { fgs_kernel_arg_ = false; // by default for asics < gfx940 old single grid sync path is followed coop_sync_ = false; + barrier_value_packet_ = false; } // ================================================================================================ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxStepping, bool enableXNACK, bool coop_groups) { customHostAllocator_ = false; + uint32_t gcnArch = gfxipMajor * 100 + gfxipMinor * 10 + gfxStepping; if (fullProfile) { pinnedXferSize_ = 0; @@ -162,6 +164,11 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor enableExtension(ClAmdFp64); } + if (gcnArch == 910) { + // Barrier Value packet is only supported on MI200 for now + barrier_value_packet_ = true; + } + if (gfxipMajor >= 10) { enableWave32Mode_ = true; enableWgpMode_ = GPU_ENABLE_WGP_MODE; @@ -175,7 +182,7 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE; } - if (gfxipMajor >= 9 && gfxipMinor >= 4 && gfxStepping >= 0) { + if (gcnArch >= 940) { coop_sync_ = true; } diff --git a/projects/clr/rocclr/device/rocm/rocsettings.hpp b/projects/clr/rocclr/device/rocm/rocsettings.hpp index df2fe39c8a..d2fffd73db 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.hpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.hpp @@ -54,7 +54,8 @@ class Settings : public device::Settings { uint skip_copy_sync_ : 1; //!< Ignore explicit HSA signal waits for copy functionality uint fgs_kernel_arg_ : 1; //!< Use fine grain kernel arg segment uint coop_sync_ : 1; //!< grid and multi-grid sync for gfx940+ - uint reserved_ : 19; + uint barrier_value_packet_ : 1; //!< Barrier value packet functionality + uint reserved_ : 18; }; uint value_; }; diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 6dd0b7b2a9..302b4b23d3 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -3214,12 +3214,21 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) { } else { profilingBegin(vcmd); if (timestamp_ != nullptr) { + const Settings& settings = dev().settings(); int32_t releaseFlags = vcmd.getEventScope(); if (releaseFlags == Device::CacheState::kCacheStateAgent) { - dispatchBarrierPacket(kBarrierPacketAgentScopeHeader, false); + if (settings.barrier_value_packet_ && vcmd.profilingInfo().marker_ts_) { + dispatchBarrierValuePacket(kBarrierPacketAgentScopeHeader); + } else { + dispatchBarrierPacket(kBarrierPacketAgentScopeHeader, false); + } } else { // Submit a barrier with a cache flushes. - dispatchBarrierPacket(kBarrierPacketHeader, false); + if (settings.barrier_value_packet_ && vcmd.profilingInfo().marker_ts_) { + dispatchBarrierValuePacket(kBarrierPacketHeader); + } else { + dispatchBarrierPacket(kBarrierPacketHeader, false); + } hasPendingDispatch_ = false; } }