clr: Increase kernelArg/managedBuffer size (#1586)

* Increase the buffer to 4MB. That can help kernel launches limited by a deep kernel pipeline

Co-authored-by: JeniferC99 <150404595+JeniferC99@users.noreply.github.com>
Этот коммит содержится в:
SaleelK
2025-11-08 18:32:43 -08:00
коммит произвёл GitHub
родитель 2f9017f706
Коммит 738bb19835
3 изменённых файлов: 4 добавлений и 5 удалений
+2 -3
Просмотреть файл
@@ -1801,7 +1801,7 @@ bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
}
hsa_agent_t agent = gpu_.dev().getBackendDevice();
for (auto& it : pool_signal_) {
if (HSA_STATUS_SUCCESS != Hsa::signal_create(0, 1, &agent, &it)) {
if (HSA_STATUS_SUCCESS != Hsa::signal_create(0, 1, &agent, HSA_AMD_SIGNAL_AMD_GPU_ONLY, &it)) {
return false;
}
}
@@ -1810,8 +1810,7 @@ bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
// ================================================================================================
address VirtualGPU::ManagedBuffer::Acquire(uint32_t size) {
auto alignment = amd::alignUp(256u, gpu_.dev().info().globalMemCacheLineSize_);
return Acquire(size, alignment);
return Acquire(size, gpu_.dev().info().globalMemCacheLineSize_);
}
// ================================================================================================
+1 -1
Просмотреть файл
@@ -196,7 +196,7 @@ class VirtualGPU : public device::VirtualDevice {
class ManagedBuffer : public amd::EmbeddedObject {
public:
//! The number of chunks the arg pool will be divided
static constexpr uint32_t kPoolNumSignals = 4;
static constexpr uint32_t kPoolNumSignals = 16;
ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size)
: gpu_(gpu), pool_size_(pool_size), pool_signal_(kPoolNumSignals) {}
~ManagedBuffer();
+1 -1
Просмотреть файл
@@ -108,7 +108,7 @@ release(uint, OPENCL_VERSION, 200, \
"Force GPU opencl version") \
release(bool, HSA_LOCAL_MEMORY_ENABLE, true, \
"Enable HSA device local memory usage") \
release(uint, HSA_KERNARG_POOL_SIZE, 1024 * 1024, \
release(uint, HSA_KERNARG_POOL_SIZE, 4 * 1024 * 1024, \
"Kernarg pool size") \
release(bool, GPU_MIPMAP, true, \
"Enables GPU mipmap extension") \