From c34ec1e52fcb52da248c00207ebe646197ea9d3e Mon Sep 17 00:00:00 2001 From: Longlong Yao Date: Mon, 5 Jan 2026 14:58:54 +0800 Subject: [PATCH] wsl/librocdxg: Change scratch memory allocation Calculate the actual scratch memory size required based on the packet information for kernel dispatch. If the required size exceeds the total allocated memory, scratch memory must be reallocated. Otherwise, no action is needed. miopen_gtest: Full/GPU_MIOpenDriverRegressionTest_FP16.MIOpenDriverRegressionHalf/0 Signed-off-by: Longlong Yao Reviewed-by: Flora Cui Reviewed-by: Horatio Zhang --- wddm/queue.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/wddm/queue.h b/wddm/queue.h index 59da825ec2..0e936c5721 100644 --- a/wddm/queue.h +++ b/wddm/queue.h @@ -178,6 +178,10 @@ public: private: hsa_status_t KernelDispatchAqlToPm4(char *cpu, hsa_kernel_dispatch_packet_t *packet); hsa_status_t BarrierGenericAqlToPm4(char *cpu, hsa_barrier_and_packet_t *packet, bool is_or = false); + + uint64_t CalcDispatchGroups(hsa_kernel_dispatch_packet_t *packet); + uint64_t CalcDispatchWavesPerGroup(hsa_kernel_dispatch_packet_t *packet, bool wave32); + struct amd_aql_pm4_ib { uint16_t header; uint16_t ven_hdr; @@ -221,7 +225,7 @@ private: return AMD_HSA_BITS_GET(amd_queue_rocr_->queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING); } void HandleError(hsa_status_t status); - bool UpdateScratch(uint32_t private_segment_size, bool wave32); + bool UpdateScratch(hsa_kernel_dispatch_packet_t *packet, bool wave32); uint32_t UpdateIndexStride(uint32_t srd, bool wave32); @@ -247,10 +251,13 @@ private: std::condition_variable thread_cond_; static void AqlToPm4Thread(ComputeQueue *queue); - uint32_t scratch_waves_; - uint32_t scratch_size_per_wave_; - uint32_t scratch_size_; + uint64_t max_scratch_waves_; + uint64_t dispatch_waves_; + uint64_t scratch_size_per_wave_; + uint64_t scratch_size_; + uint64_t total_scratch_size_; void *scratch_base_; + uint32_t scratch_mem_alignment_size_; GpuMemoryHandle scratch_mem_; std::vector scratch_base_offset_array_;