From 2caff650a91222cc09fc3c89c00aada3d65a86e3 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 6 Jul 2018 17:53:44 -0400
Subject: [PATCH] P4 to Git Change 1577357 by gandryey@gera-w8 on 2018/07/06
17:44:59
SWDEV-158017 - CL_DEVICE_GLOBAL_FREE_MEMORY_AMD doesn't work correctly on PAL backend
- Adjust system memory calculation for APU systems
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#594 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#89 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#95 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#26 edit
---
rocclr/runtime/device/gpu/gpudevice.cpp | 6 ++++--
rocclr/runtime/device/gpu/gpuresource.hpp | 3 +++
rocclr/runtime/device/pal/paldevice.cpp | 18 ++++++++++--------
rocclr/runtime/device/pal/palresource.hpp | 3 +++
4 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp
index 0ac9a5ae68..6f515bea2b 100644
--- a/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -1812,10 +1812,12 @@ bool Device::globalFreeMemory(size_t* freeMemory) const {
if (settings().apuSystem_) {
if (settings().viPlus_) {
// for viPlus_, OCL is using remote instead remoteUSWC to avoid extra copy
- freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableCacheableBytes / Ki;
+ freeMemory[TotalFreeMemory] += (memInfo.agpMemAvailableCacheableBytes -
+ resourceCache().lclCacheSize() + resourceCache().cacheSize()) / Ki;
freeMemory[LargestFreeBlock] += memInfo.agpCacheableLargestFreeBlockBytes / Ki;
} else {
- freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
+ freeMemory[TotalFreeMemory] += (memInfo.agpMemAvailableBytes -
+ resourceCache().lclCacheSize() + resourceCache().cacheSize()) / Ki;
freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
}
}
diff --git a/rocclr/runtime/device/gpu/gpuresource.hpp b/rocclr/runtime/device/gpu/gpuresource.hpp
index 350c930de3..83c23abb2f 100644
--- a/rocclr/runtime/device/gpu/gpuresource.hpp
+++ b/rocclr/runtime/device/gpu/gpuresource.hpp
@@ -449,6 +449,9 @@ class ResourceCache : public amd::HeapObject {
//! Destroys cache
bool free(size_t minCacheEntries = 0);
+ //! Returns the size of all memory, stored in the cache
+ size_t cacheSize() const { return cacheSize_; }
+
//! Returns the size of local memory, stored in the cache
size_t lclCacheSize() const { return lclCacheSize_; }
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index 9c31edc6a5..b4b21d7727 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -1695,16 +1695,18 @@ bool Device::globalFreeMemory(size_t* freeMemory) const {
freeMemory[LargestFreeBlock] = static_cast(std::max(local, invisible) / Ki);
if (settings().apuSystem_) {
- Pal::gpusize uswc = allocedMem[Pal::GpuHeapGartUswc];
- if (uswc >= heaps_[Pal::GpuHeapGartUswc].heapSize) {
- uswc = 0;
+ Pal::GpuHeap heap = settings().viPlus_ ? Pal::GpuHeapGartCacheable: Pal::GpuHeapGartUswc;
+ Pal::gpusize sysMem = allocedMem[heap];
+ if (sysMem >= heaps_[heap].heapSize) {
+ sysMem = 0;
} else {
- uswc = heaps_[Pal::GpuHeapGartUswc].heapSize - uswc;
+ sysMem = heaps_[heap].heapSize - sysMem +
+ resourceCache().cacheSize() - resourceCache().lclCacheSize();
}
- uswc /= Ki;
- freeMemory[TotalFreeMemory] += static_cast(uswc);
- if (freeMemory[LargestFreeBlock] < uswc) {
- freeMemory[LargestFreeBlock] = static_cast(uswc);
+ sysMem /= Ki;
+ freeMemory[TotalFreeMemory] += static_cast(sysMem);
+ if (freeMemory[LargestFreeBlock] < sysMem) {
+ freeMemory[LargestFreeBlock] = static_cast(sysMem);
}
}
diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp
index 499993eb65..9b4c63f24a 100644
--- a/rocclr/runtime/device/pal/palresource.hpp
+++ b/rocclr/runtime/device/pal/palresource.hpp
@@ -558,6 +558,9 @@ class ResourceCache : public amd::HeapObject {
//! Returns true if cache was freed and false if cache is already empty.
bool free(size_t minCacheEntries = 0);
+ //! Returns the size of all memory, stored in the cache
+ size_t cacheSize() const { return cacheSize_; }
+
//! Returns the size of local memory, stored in the cache
size_t lclCacheSize() const { return lclCacheSize_; }