clr: Increase kernelArg/managedBuffer size (#1586)
* Increase the buffer to 4MB. That can help kernel launches limited by a deep kernel pipeline Co-authored-by: JeniferC99 <150404595+JeniferC99@users.noreply.github.com>
Этот коммит содержится в:
@@ -1801,7 +1801,7 @@ bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
|
||||
}
|
||||
hsa_agent_t agent = gpu_.dev().getBackendDevice();
|
||||
for (auto& it : pool_signal_) {
|
||||
if (HSA_STATUS_SUCCESS != Hsa::signal_create(0, 1, &agent, &it)) {
|
||||
if (HSA_STATUS_SUCCESS != Hsa::signal_create(0, 1, &agent, HSA_AMD_SIGNAL_AMD_GPU_ONLY, &it)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1810,8 +1810,7 @@ bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
|
||||
|
||||
// ================================================================================================
|
||||
address VirtualGPU::ManagedBuffer::Acquire(uint32_t size) {
|
||||
auto alignment = amd::alignUp(256u, gpu_.dev().info().globalMemCacheLineSize_);
|
||||
return Acquire(size, alignment);
|
||||
return Acquire(size, gpu_.dev().info().globalMemCacheLineSize_);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
@@ -196,7 +196,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
class ManagedBuffer : public amd::EmbeddedObject {
|
||||
public:
|
||||
//! The number of chunks the arg pool will be divided
|
||||
static constexpr uint32_t kPoolNumSignals = 4;
|
||||
static constexpr uint32_t kPoolNumSignals = 16;
|
||||
ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size)
|
||||
: gpu_(gpu), pool_size_(pool_size), pool_signal_(kPoolNumSignals) {}
|
||||
~ManagedBuffer();
|
||||
|
||||
@@ -108,7 +108,7 @@ release(uint, OPENCL_VERSION, 200, \
|
||||
"Force GPU opencl version") \
|
||||
release(bool, HSA_LOCAL_MEMORY_ENABLE, true, \
|
||||
"Enable HSA device local memory usage") \
|
||||
release(uint, HSA_KERNARG_POOL_SIZE, 1024 * 1024, \
|
||||
release(uint, HSA_KERNARG_POOL_SIZE, 4 * 1024 * 1024, \
|
||||
"Kernarg pool size") \
|
||||
release(bool, GPU_MIPMAP, true, \
|
||||
"Enables GPU mipmap extension") \
|
||||
|
||||
Ссылка в новой задаче
Block a user