diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 7567703b8c..5abfd73284 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -1801,7 +1801,7 @@ bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) { } hsa_agent_t agent = gpu_.dev().getBackendDevice(); for (auto& it : pool_signal_) { - if (HSA_STATUS_SUCCESS != Hsa::signal_create(0, 1, &agent, &it)) { + if (HSA_STATUS_SUCCESS != Hsa::signal_create(0, 1, &agent, HSA_AMD_SIGNAL_AMD_GPU_ONLY, &it)) { return false; } } @@ -1810,8 +1810,7 @@ bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) { // ================================================================================================ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size) { - auto alignment = amd::alignUp(256u, gpu_.dev().info().globalMemCacheLineSize_); - return Acquire(size, alignment); + return Acquire(size, gpu_.dev().info().globalMemCacheLineSize_); } // ================================================================================================ diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 7f23fa4a28..ce32d8fae3 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -196,7 +196,7 @@ class VirtualGPU : public device::VirtualDevice { class ManagedBuffer : public amd::EmbeddedObject { public: //! The number of chunks the arg pool will be divided - static constexpr uint32_t kPoolNumSignals = 4; + static constexpr uint32_t kPoolNumSignals = 16; ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size) : gpu_(gpu), pool_size_(pool_size), pool_signal_(kPoolNumSignals) {} ~ManagedBuffer(); diff --git a/projects/clr/rocclr/utils/flags.hpp b/projects/clr/rocclr/utils/flags.hpp index b31b3834e6..9b1179be36 100644 --- a/projects/clr/rocclr/utils/flags.hpp +++ b/projects/clr/rocclr/utils/flags.hpp @@ -108,7 +108,7 @@ release(uint, OPENCL_VERSION, 200, \ "Force GPU opencl version") \ release(bool, HSA_LOCAL_MEMORY_ENABLE, true, \ "Enable HSA device local memory usage") \ -release(uint, HSA_KERNARG_POOL_SIZE, 1024 * 1024, \ +release(uint, HSA_KERNARG_POOL_SIZE, 4 * 1024 * 1024, \ "Kernarg pool size") \ release(bool, GPU_MIPMAP, true, \ "Enables GPU mipmap extension") \