From fc3625d738cd86cd6473445b57013206c548af5e Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 12 Jul 2018 14:31:30 -0400
Subject: [PATCH] P4 to Git Change 1579735 by gandryey@gera-w8 on 2018/07/12
14:22:40
SWDEV-158730 - [CQE OCL][ocltst][WIN] OCLMemoryInfo[1] a sub-test of ocltst oclruntime module is failed
- Update free memory calculation for APU systems. There is still an issue in GSL with multiple alloc/free passes
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#595 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#96 edit
[ROCm/clr commit: 0b08fa4a4ea3f58e6f5d75925e74f06d69f7c982]
---
.../rocclr/runtime/device/gpu/gpudevice.cpp | 21 +++++-----
.../rocclr/runtime/device/pal/paldevice.cpp | 42 ++++++++-----------
2 files changed, 29 insertions(+), 34 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
index 6f515bea2b..6c9821eef9 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -1800,28 +1800,29 @@ bool Device::globalFreeMemory(size_t* freeMemory) const {
return false;
}
- gslMemInfo memInfo = {0};
+ gslMemInfo memInfo = { 0 };
gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
// Fill free memory info
- freeMemory[TotalFreeMemory] =
- (memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes +
- resourceCache().lclCacheSize()) / Ki;
+ freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes +
+ resourceCache().lclCacheSize()) / Ki;
freeMemory[LargestFreeBlock] =
- std::max(memInfo.cardLargestFreeBlockBytes, memInfo.cardExtLargestFreeBlockBytes) / Ki;
+ std::max(memInfo.cardLargestFreeBlockBytes, memInfo.cardExtLargestFreeBlockBytes) / Ki;
if (settings().apuSystem_) {
+ uint64_t sysMem = 0;
+ if ((memInfo.agpMemAvailableBytes + resourceCache().cacheSize()) > resourceCache().lclCacheSize()) {
+ sysMem = (memInfo.agpMemAvailableBytes + resourceCache().cacheSize()) - resourceCache().lclCacheSize();
+ }
+ sysMem /= Ki;
+ freeMemory[TotalFreeMemory] += sysMem;
+
if (settings().viPlus_) {
// for viPlus_, OCL is using remote instead remoteUSWC to avoid extra copy
- freeMemory[TotalFreeMemory] += (memInfo.agpMemAvailableCacheableBytes -
- resourceCache().lclCacheSize() + resourceCache().cacheSize()) / Ki;
freeMemory[LargestFreeBlock] += memInfo.agpCacheableLargestFreeBlockBytes / Ki;
} else {
- freeMemory[TotalFreeMemory] += (memInfo.agpMemAvailableBytes -
- resourceCache().lclCacheSize() + resourceCache().cacheSize()) / Ki;
freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
}
}
-
return true;
}
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index b4b21d7727..e533330c0e 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -1678,35 +1678,29 @@ bool Device::globalFreeMemory(size_t* freeMemory) const {
Pal::gpusize local = allocedMem[Pal::GpuHeapLocal];
Pal::gpusize invisible = allocedMem[Pal::GpuHeapInvisible] - resourceCache().lclCacheSize();
- // Calculate free memory
- if (local >= heaps_[Pal::GpuHeapLocal].heapSize) {
- local = 0;
- } else {
- local = heaps_[Pal::GpuHeapLocal].heapSize - local;
- }
- if (invisible >= info().maxMemAllocSize_) {
- invisible = 0;
- } else {
- invisible = info().maxMemAllocSize_ - invisible;
- }
// Fill free memory info
- freeMemory[TotalFreeMemory] = static_cast((local + invisible) / Ki);
- freeMemory[LargestFreeBlock] = static_cast(std::max(local, invisible) / Ki);
+ freeMemory[TotalFreeMemory] = static_cast((info().globalMemSize_ -
+ (local + invisible)) / Ki);
+ if (invisible >= heaps_[Pal::GpuHeapInvisible].heapSize) {
+ invisible = 0;
+ }
+ else {
+ invisible = heaps_[Pal::GpuHeapInvisible].heapSize - invisible;
+ }
+ freeMemory[LargestFreeBlock] = static_cast(invisible) / Ki;
if (settings().apuSystem_) {
- Pal::GpuHeap heap = settings().viPlus_ ? Pal::GpuHeapGartCacheable: Pal::GpuHeapGartUswc;
- Pal::gpusize sysMem = allocedMem[heap];
- if (sysMem >= heaps_[heap].heapSize) {
- sysMem = 0;
- } else {
- sysMem = heaps_[heap].heapSize - sysMem +
- resourceCache().cacheSize() - resourceCache().lclCacheSize();
- }
+ Pal::gpusize sysMem = allocedMem[Pal::GpuHeapGartCacheable] + allocedMem[Pal::GpuHeapGartUswc] -
+ resourceCache().cacheSize() + resourceCache().lclCacheSize();
sysMem /= Ki;
- freeMemory[TotalFreeMemory] += static_cast(sysMem);
- if (freeMemory[LargestFreeBlock] < sysMem) {
- freeMemory[LargestFreeBlock] = static_cast(sysMem);
+ if (sysMem >= freeMemory[TotalFreeMemory]) {
+ freeMemory[TotalFreeMemory] = 0;
+ } else {
+ freeMemory[TotalFreeMemory] -= sysMem;
+ }
+ if (freeMemory[LargestFreeBlock] < freeMemory[TotalFreeMemory]) {
+ freeMemory[LargestFreeBlock] = freeMemory[TotalFreeMemory];
}
}