SWDEV-374401 - Surface the ammount of used constant memory by kernel
Change-Id: I6de0e46482f27b9068582fdaee0e562f9f71f9f0
[ROCm/clr commit: 4d49204fb9]
Этот коммит содержится в:
@@ -609,6 +609,7 @@ Kernel::Kernel(const amd::Device& dev, const std::string& name, const Program& p
|
||||
workGroupInfo_.compileVecTypeHint_ = "";
|
||||
workGroupInfo_.uniformWorkGroupSize_ = false;
|
||||
workGroupInfo_.wavesPerSimdHint_ = 0;
|
||||
workGroupInfo_.constMemSize_ = 0;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
@@ -385,6 +385,7 @@ class Kernel : public amd::HeapObject {
|
||||
bool uniformWorkGroupSize_; //!< uniform work group size option
|
||||
size_t wavesPerSimdHint_; //!< waves per simd hit
|
||||
int maxOccupancyPerCu_; //!< Max occupancy per compute unit in threads
|
||||
size_t constMemSize_; //!< size of user-allocated constant memory
|
||||
};
|
||||
|
||||
//! Default constructor
|
||||
|
||||
@@ -158,6 +158,28 @@ bool LightningKernel::postLoad() {
|
||||
}
|
||||
assert(wavefront_size > 0);
|
||||
|
||||
size_t const_size_bytes = 0;
|
||||
hsa_executable_iterate_symbols(
|
||||
program()->hsaExecutable(),
|
||||
[](hsa_executable_t executable, hsa_executable_symbol_t symbol,
|
||||
void *const_size_bytes) -> hsa_status_t {
|
||||
bool variable_is_const = false;
|
||||
hsa_status_t hsaStat = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST, &variable_is_const);
|
||||
|
||||
if (hsaStat == HSA_STATUS_SUCCESS && variable_is_const) {
|
||||
uint32_t variable_size = 0;
|
||||
if (hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
|
||||
&variable_size) == HSA_STATUS_SUCCESS) {
|
||||
*(static_cast<size_t *>(const_size_bytes)) += variable_size;
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&const_size_bytes);
|
||||
|
||||
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
|
||||
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
|
||||
@@ -165,6 +187,7 @@ bool LightningKernel::postLoad() {
|
||||
workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontSize_ = wavefront_size;
|
||||
workGroupInfo_.constMemSize_ = const_size_bytes;
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user