From 02b38d06142caeac79eb28c8a12bc2f17e9de7b6 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 5 Mar 2025 19:14:02 +0000 Subject: [PATCH] rocr: Put back scratch_backing_memory_byte_size The scratch_backing_memory_byte_size is not used by CP, but it is currently used by rocgdb. Putting the field back, but we need to find a solution for alt_scratch_backing_memory_byte_size. Also, completely disabling alternate scratch as we need some changes to support debugger. --- runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp | 9 +++++++++ runtime/hsa-runtime/core/util/flag.h | 8 +++++--- runtime/hsa-runtime/inc/amd_hsa_queue.h | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 7d64774000..47a6f3876b 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -1993,6 +1993,15 @@ void AqlQueue::InitScratchSRD() { amd_queue_.alt_scratch_backing_memory_location = queue_scratch_.alt_queue_process_offset; const auto& agent_props = agent_->properties(); + const uint32_t num_xcc = agent_props.NumXcc; + + // FIXME: amd_queue_.scratch_backing_memory_byte_size is not used by CP, but it + // is used by the debugger. Putting back the scratch_backing_memory_byte_size + // field. But we need to find a location for alt_scratch_backing_memory_byte_size + + // report size per XCC + amd_queue_.scratch_backing_memory_byte_size = queue_scratch_.main_size / num_xcc; + //amd_queue_.alt_scratch_backing_memory_byte_size = queue_scratch_.alt_size / num_xcc; // For backwards compatibility this field records the per-lane scratch // for a 64 lane wavefront. If scratch was allocated for 32 lane waves diff --git a/runtime/hsa-runtime/core/util/flag.h b/runtime/hsa-runtime/core/util/flag.h index 95286b8a9d..6a6d7bd483 100644 --- a/runtime/hsa-runtime/core/util/flag.h +++ b/runtime/hsa-runtime/core/util/flag.h @@ -154,9 +154,11 @@ class Flag { enable_scratch_async_reclaim_ = (var == "0") ? false : true; var = os::GetEnvVar("HSA_ENABLE_SCRATCH_ALT"); - //Temporary: Disable alternate scratch by default as it can cause occasional hangs - //enable_scratch_alt_ = (var == "0") || !enable_scratch_async_reclaim_ ? false : true; - enable_scratch_alt_ = (var == "1") && enable_scratch_async_reclaim_ ? true : false; + // Temporary: Completely disable alternate scratch because we need to update + // the debugger so that it can tell whether a dispatch is using alternate scratch + // instead of main scratch + // enable_scratch_alt_ = (var == "0") || !enable_scratch_async_reclaim_ ? false : true; + enable_scratch_alt_ = false; tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB"); diff --git a/runtime/hsa-runtime/inc/amd_hsa_queue.h b/runtime/hsa-runtime/inc/amd_hsa_queue.h index 440508a663..0f3a759a21 100644 --- a/runtime/hsa-runtime/inc/amd_hsa_queue.h +++ b/runtime/hsa-runtime/inc/amd_hsa_queue.h @@ -133,7 +133,7 @@ typedef struct AMD_QUEUE_ALIGN amd_queue_v2_s { uint32_t compute_tmpring_size; uint32_t scratch_resource_descriptor[4]; uint64_t scratch_backing_memory_location; - uint32_t reserved3[2]; + uint64_t scratch_backing_memory_byte_size; uint32_t scratch_wave64_lane_byte_size; amd_queue_properties32_t queue_properties; volatile uint64_t scratch_max_use_index; /* V2 */