librocdxg: use shared GPU memory as vram on small APU

Signed-off-by: Longlong Yao <Longlong.Yao@amd.com>
Signed-off-by: Flora Cui <flora.cui@amd.com>
This commit is contained in:
Longlong Yao
2025-10-27 11:20:26 +08:00
committed by Flora Cui
vanhempi 5ebe95d5b2
commit e616b3e65e
4 muutettua tiedostoa jossa 58 lisäystä ja 19 poistoa
+8 -1
Näytä tiedosto
@@ -145,7 +145,14 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() {
device = get_wddmdev(j+1);
if (device == nullptr)
return -1;
total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4;
/*
* For APU, use non local memory(shared GPU memory) as GPU memory,
* because it has small local memory
*/
if (device->IsDgpu())
total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4;
else
total_local_size += wsl::AlignUp(device->NonLocalHeapSize(), align) * 4;
}
local_heap_space_start_ = 0;
Binary file not shown.
+6 -1
Näytä tiedosto
@@ -642,7 +642,12 @@ static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
return ret;
props.HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
props.SizeInBytes = device->LocalHeapSize();
if (device->IsDgpu())
props.SizeInBytes = device->LocalHeapSize();
else
props.SizeInBytes = device->NonLocalHeapSize();
props.Width = device->MemoryBusWidth();
props.MemoryClockMax = device->MaxMemoryClockMhz();
+44 -17
Näytä tiedosto
@@ -152,37 +152,64 @@ bool WDDMDevice::GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE segment_type,
return false;
}
/*Local heap(dedicated GPU memory) includes visiable heap and invisiable heap.
*Non local heap refers to shared GPU memory and it is sytem memory.
*/
uint64_t WDDMDevice::VramAvail(void) {
D3DKMT_QUERYSTATISTICS stats;
NTSTATUS ret;
uint64_t usedVis = 0;
uint64_t usedInv = 0;
uint64_t usedNonLocal = 0;
uint32_t segmentId = 0;
// wait fence complete
uint64_t value = page_fence_value_.load();
if(!CpuWait(&page_syncobj_, &value, 1, false))
return HSA_STATUS_ERROR;
// local cpu-visible memory
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
stats.AdapterLuid = adapter_luid_;
stats.QuerySegment.SegmentId = 0;
ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats));
if (ret == 0)
usedVis = stats.QueryResult.SegmentInformation.BytesResident;
if (IsDgpu()) {
// local cpu-visible memory
if(!GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE_MEMORY, segmentId))
return HSA_STATUS_ERROR;
// local invisible memory
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
stats.AdapterLuid = adapter_luid_;
stats.QuerySegment.SegmentId = 1;
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
stats.AdapterLuid = adapter_luid_;
stats.QuerySegment.SegmentId = segmentId;
ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats));
if (ret == 0)
usedVis = stats.QueryResult.SegmentInformation.BytesResident;
ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats));
if (ret == 0)
usedInv = stats.QueryResult.SegmentInformation.BytesResident;
// local invisible memory
if (device_info_.local_invisible_heap_size) {
segmentId++;
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
stats.AdapterLuid = adapter_luid_;
stats.QuerySegment.SegmentId = 1;
return LocalHeapSize() - usedVis - usedInv;
ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats));
if (ret == 0)
usedInv = stats.QueryResult.SegmentInformation.BytesResident;
}
return LocalHeapSize() - usedVis - usedInv;
} else {
// APU - NonLocal memory
if(!GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE_SYSMEM, segmentId))
return HSA_STATUS_ERROR;
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
stats.AdapterLuid = adapter_luid_;
stats.QuerySegment.SegmentId = segmentId;
ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats));
if (ret == 0)
usedNonLocal = stats.QueryResult.SegmentInformation.BytesResident;
return NonLocalHeapSize() - usedNonLocal;
}
}
bool WDDMDevice::CreateDevice(void) {