From ab81da65443700154cb5ee51bf88a8671c98ea6e Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras Date: Fri, 6 Jan 2023 01:38:22 +0000 Subject: [PATCH] SWDEV-374401 - Surface the ammount of used constant memory by kernel Change-Id: I6de0e46482f27b9068582fdaee0e562f9f71f9f0 [ROCm/clr commit: 4d49204fb9a862f5ceda3cba52784799b4207626] --- projects/clr/rocclr/device/devkernel.cpp | 1 + projects/clr/rocclr/device/devkernel.hpp | 1 + projects/clr/rocclr/device/rocm/rockernel.cpp | 23 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index 914811fe70..2313084bef 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -609,6 +609,7 @@ Kernel::Kernel(const amd::Device& dev, const std::string& name, const Program& p workGroupInfo_.compileVecTypeHint_ = ""; workGroupInfo_.uniformWorkGroupSize_ = false; workGroupInfo_.wavesPerSimdHint_ = 0; + workGroupInfo_.constMemSize_ = 0; } // ================================================================================================ diff --git a/projects/clr/rocclr/device/devkernel.hpp b/projects/clr/rocclr/device/devkernel.hpp index b256abf3f8..f2ad0df404 100644 --- a/projects/clr/rocclr/device/devkernel.hpp +++ b/projects/clr/rocclr/device/devkernel.hpp @@ -385,6 +385,7 @@ class Kernel : public amd::HeapObject { bool uniformWorkGroupSize_; //!< uniform work group size option size_t wavesPerSimdHint_; //!< waves per simd hit int maxOccupancyPerCu_; //!< Max occupancy per compute unit in threads + size_t constMemSize_; //!< size of user-allocated constant memory }; //! Default constructor diff --git a/projects/clr/rocclr/device/rocm/rockernel.cpp b/projects/clr/rocclr/device/rocm/rockernel.cpp index 5624c87757..110625f29b 100644 --- a/projects/clr/rocclr/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/device/rocm/rockernel.cpp @@ -158,6 +158,28 @@ bool LightningKernel::postLoad() { } assert(wavefront_size > 0); + size_t const_size_bytes = 0; + hsa_executable_iterate_symbols( + program()->hsaExecutable(), + [](hsa_executable_t executable, hsa_executable_symbol_t symbol, + void *const_size_bytes) -> hsa_status_t { + bool variable_is_const = false; + hsa_status_t hsaStat = hsa_executable_symbol_get_info( + symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST, &variable_is_const); + + if (hsaStat == HSA_STATUS_SUCCESS && variable_is_const) { + uint32_t variable_size = 0; + if (hsa_executable_symbol_get_info( + symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, + &variable_size) == HSA_STATUS_SUCCESS) { + *(static_cast(const_size_bytes)) += variable_size; + } + } + + return HSA_STATUS_SUCCESS; + }, + &const_size_bytes); + workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_; workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_; workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_; @@ -165,6 +187,7 @@ bool LightningKernel::postLoad() { workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_; workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size; workGroupInfo_.wavefrontSize_ = wavefront_size; + workGroupInfo_.constMemSize_ = const_size_bytes; if (workGroupInfo_.size_ == 0) { return false; }