diff --git a/openclose.cpp b/openclose.cpp index d903e2aa44..2435c4fc94 100644 --- a/openclose.cpp +++ b/openclose.cpp @@ -145,7 +145,14 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { device = get_wddmdev(j+1); if (device == nullptr) return -1; - total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; + /* + * For APU, use non local memory(shared GPU memory) as GPU memory, + * because it has small local memory + */ + if (device->IsDgpu()) + total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; + else + total_local_size += wsl::AlignUp(device->NonLocalHeapSize(), align) * 4; } local_heap_space_start_ = 0; diff --git a/thunk_proxy/libthunk_proxy.a b/thunk_proxy/libthunk_proxy.a index 4b1ed96afc..afc99652df 100644 Binary files a/thunk_proxy/libthunk_proxy.a and b/thunk_proxy/libthunk_proxy.a differ diff --git a/topology.cpp b/topology.cpp index ee23adb908..2db712e341 100644 --- a/topology.cpp +++ b/topology.cpp @@ -642,7 +642,12 @@ static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id, return ret; props.HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE; - props.SizeInBytes = device->LocalHeapSize(); + + if (device->IsDgpu()) + props.SizeInBytes = device->LocalHeapSize(); + else + props.SizeInBytes = device->NonLocalHeapSize(); + props.Width = device->MemoryBusWidth(); props.MemoryClockMax = device->MaxMemoryClockMhz(); diff --git a/wddm/device.cpp b/wddm/device.cpp index 80e3dc4d62..f51af85404 100644 --- a/wddm/device.cpp +++ b/wddm/device.cpp @@ -152,37 +152,64 @@ bool WDDMDevice::GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE segment_type, return false; } +/*Local heap(dedicated GPU memory) includes visiable heap and invisiable heap. + *Non local heap refers to shared GPU memory and it is sytem memory. + */ uint64_t WDDMDevice::VramAvail(void) { D3DKMT_QUERYSTATISTICS stats; NTSTATUS ret; uint64_t usedVis = 0; uint64_t usedInv = 0; + uint64_t usedNonLocal = 0; + uint32_t segmentId = 0; // wait fence complete uint64_t value = page_fence_value_.load(); if(!CpuWait(&page_syncobj_, &value, 1, false)) return HSA_STATUS_ERROR; - // local cpu-visible memory - memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); - stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; - stats.AdapterLuid = adapter_luid_; - stats.QuerySegment.SegmentId = 0; - ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); - if (ret == 0) - usedVis = stats.QueryResult.SegmentInformation.BytesResident; + if (IsDgpu()) { + // local cpu-visible memory + if(!GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE_MEMORY, segmentId)) + return HSA_STATUS_ERROR; - // local invisible memory - memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); - stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; - stats.AdapterLuid = adapter_luid_; - stats.QuerySegment.SegmentId = 1; + memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); + stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; + stats.AdapterLuid = adapter_luid_; + stats.QuerySegment.SegmentId = segmentId; + ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); + if (ret == 0) + usedVis = stats.QueryResult.SegmentInformation.BytesResident; - ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); - if (ret == 0) - usedInv = stats.QueryResult.SegmentInformation.BytesResident; + // local invisible memory + if (device_info_.local_invisible_heap_size) { + segmentId++; + memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); + stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; + stats.AdapterLuid = adapter_luid_; + stats.QuerySegment.SegmentId = 1; - return LocalHeapSize() - usedVis - usedInv; + ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); + if (ret == 0) + usedInv = stats.QueryResult.SegmentInformation.BytesResident; + } + + return LocalHeapSize() - usedVis - usedInv; + } else { + // APU - NonLocal memory + if(!GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE_SYSMEM, segmentId)) + return HSA_STATUS_ERROR; + + memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); + stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; + stats.AdapterLuid = adapter_luid_; + stats.QuerySegment.SegmentId = segmentId; + ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); + if (ret == 0) + usedNonLocal = stats.QueryResult.SegmentInformation.BytesResident; + + return NonLocalHeapSize() - usedNonLocal; + } } bool WDDMDevice::CreateDevice(void) {