From 03910f3663d663cecf2c2f5fe6d67ee40dd175e1 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 5 Jun 2018 15:48:18 -0400
Subject: [PATCH] P4 to Git Change 1564298 by gandryey@gera-w8 on 2018/06/05
15:43:17
SWDEV-79445 - OCL generic changes and code clean-up
- Add reallocation logic for memory dependency. SVM path can send the amount of SVM ptrs over the max size of arguments
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#420 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#105 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#54 edit
[ROCm/clr commit: 1130565901258cb44898acd76719898166cdced4]
---
.../rocclr/runtime/device/gpu/gpuvirtual.cpp | 29 +++++++++++++-----
.../rocclr/runtime/device/pal/palvirtual.cpp | 28 +++++++++++++----
.../rocclr/runtime/device/rocm/rocvirtual.cpp | 30 ++++++++++++++-----
3 files changed, 65 insertions(+), 22 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 65b188c4b8..38c67e5906 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -80,7 +80,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
}
// Did we reach the limit?
- if (maxMemObjectsInQueue_ <= (numMemObjectsInQueue_ + 1)) {
+ if (maxMemObjectsInQueue_ <= numMemObjectsInQueue_) {
flushL1Cache = true;
}
@@ -109,14 +109,27 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
endMemObjectsInQueue_ = numMemObjectsInQueue_;
}
- // Preserve all objects from the current kernel
- for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
- memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
- memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
- memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
+ // If the current launch didn't start from the beginning, then move the data
+ if (0 != endMemObjectsInQueue_) {
+ // Preserve all objects from the current kernel
+ for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
+ memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
+ memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
+ memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
+ }
+ } else if (numMemObjectsInQueue_ >= maxMemObjectsInQueue_) {
+ // note: The array growth shouldn't occur under the normal conditions,
+ // but in a case when SVM path sends the amount of SVM ptrs over
+ // the max size of kernel arguments
+ MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
+ if (nullptr == ptr) {
+ numMemObjectsInQueue_ = 0;
+ return;
+ }
+ maxMemObjectsInQueue_ <<= 1;
+ memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
+ memObjectsInQueue_= ptr;
}
- // Clear all objects except current kernel
- memset(&memObjectsInQueue_[i], 0, sizeof(amd::Memory*) * numMemObjectsInQueue_);
numMemObjectsInQueue_ -= endMemObjectsInQueue_;
endMemObjectsInQueue_ = 0;
}
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 80443ef28a..1f0756342d 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -454,7 +454,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
}
// Did we reach the limit?
- if (maxMemObjectsInQueue_ <= (numMemObjectsInQueue_ + 1)) {
+ if (maxMemObjectsInQueue_ <= numMemObjectsInQueue_) {
flushL1Cache = true;
}
@@ -485,12 +485,28 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
endMemObjectsInQueue_ = numMemObjectsInQueue_;
}
- // Preserve all objects from the current kernel
- for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
- memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
- memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
- memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
+ // If the current launch didn't start from the beginning, then move the data
+ if (0 != endMemObjectsInQueue_) {
+ // Preserve all objects from the current kernel
+ for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
+ memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
+ memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
+ memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
+ }
+ } else if (numMemObjectsInQueue_ >= maxMemObjectsInQueue_) {
+ // note: The array growth shouldn't occur under the normal conditions,
+ // but in a case when SVM path sends the amount of SVM ptrs over
+ // the max size of kernel arguments
+ MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
+ if (nullptr == ptr) {
+ numMemObjectsInQueue_ = 0;
+ return;
+ }
+ maxMemObjectsInQueue_ <<= 1;
+ memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
+ memObjectsInQueue_= ptr;
}
+
// Adjust the number of active objects
numMemObjectsInQueue_ -= endMemObjectsInQueue_;
endMemObjectsInQueue_ = 0;
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp
index 924761a897..e147071c78 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp
@@ -128,7 +128,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
}
// Did we reach the limit?
- if (maxMemObjectsInQueue_ <= (numMemObjectsInQueue_ + 1)) {
+ if (maxMemObjectsInQueue_ <= numMemObjectsInQueue_) {
flushL1Cache = true;
}
@@ -157,14 +157,28 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
endMemObjectsInQueue_ = numMemObjectsInQueue_;
}
- // Preserve all objects from the current kernel
- for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
- memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
- memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
- memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
+ // If the current launch didn't start from the beginning, then move the data
+ if (0 != endMemObjectsInQueue_) {
+ // Preserve all objects from the current kernel
+ for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
+ memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
+ memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
+ memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
+ }
+ } else if (numMemObjectsInQueue_ >= maxMemObjectsInQueue_) {
+ // note: The array growth shouldn't occur under the normal conditions,
+ // but in a case when SVM path sends the amount of SVM ptrs over
+ // the max size of kernel arguments
+ MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
+ if (nullptr == ptr) {
+ numMemObjectsInQueue_ = 0;
+ return;
+ }
+ maxMemObjectsInQueue_ <<= 1;
+ memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
+ memObjectsInQueue_= ptr;
}
- // Clear all objects except current kernel
- memset(&memObjectsInQueue_[i], 0, sizeof(amd::Memory*) * numMemObjectsInQueue_);
+
numMemObjectsInQueue_ -= endMemObjectsInQueue_;
endMemObjectsInQueue_ = 0;
}