P4 to Git Change 1564298 by gandryey@gera-w8 on 2018/06/05 15:43:17
SWDEV-79445 - OCL generic changes and code clean-up
- Add reallocation logic for memory dependency. SVM path can send the amount of SVM ptrs over the max size of arguments
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#420 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#105 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#54 edit
[ROCm/clr commit: 1130565901]
此提交包含在:
@@ -80,7 +80,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
}
|
||||
|
||||
// Did we reach the limit?
|
||||
if (maxMemObjectsInQueue_ <= (numMemObjectsInQueue_ + 1)) {
|
||||
if (maxMemObjectsInQueue_ <= numMemObjectsInQueue_) {
|
||||
flushL1Cache = true;
|
||||
}
|
||||
|
||||
@@ -109,14 +109,27 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
endMemObjectsInQueue_ = numMemObjectsInQueue_;
|
||||
}
|
||||
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
|
||||
// If the current launch didn't start from the beginning, then move the data
|
||||
if (0 != endMemObjectsInQueue_) {
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
|
||||
}
|
||||
} else if (numMemObjectsInQueue_ >= maxMemObjectsInQueue_) {
|
||||
// note: The array growth shouldn't occur under the normal conditions,
|
||||
// but in a case when SVM path sends the amount of SVM ptrs over
|
||||
// the max size of kernel arguments
|
||||
MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
|
||||
if (nullptr == ptr) {
|
||||
numMemObjectsInQueue_ = 0;
|
||||
return;
|
||||
}
|
||||
maxMemObjectsInQueue_ <<= 1;
|
||||
memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
|
||||
memObjectsInQueue_= ptr;
|
||||
}
|
||||
// Clear all objects except current kernel
|
||||
memset(&memObjectsInQueue_[i], 0, sizeof(amd::Memory*) * numMemObjectsInQueue_);
|
||||
numMemObjectsInQueue_ -= endMemObjectsInQueue_;
|
||||
endMemObjectsInQueue_ = 0;
|
||||
}
|
||||
|
||||
@@ -454,7 +454,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
}
|
||||
|
||||
// Did we reach the limit?
|
||||
if (maxMemObjectsInQueue_ <= (numMemObjectsInQueue_ + 1)) {
|
||||
if (maxMemObjectsInQueue_ <= numMemObjectsInQueue_) {
|
||||
flushL1Cache = true;
|
||||
}
|
||||
|
||||
@@ -485,12 +485,28 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
endMemObjectsInQueue_ = numMemObjectsInQueue_;
|
||||
}
|
||||
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
|
||||
// If the current launch didn't start from the beginning, then move the data
|
||||
if (0 != endMemObjectsInQueue_) {
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
|
||||
}
|
||||
} else if (numMemObjectsInQueue_ >= maxMemObjectsInQueue_) {
|
||||
// note: The array growth shouldn't occur under the normal conditions,
|
||||
// but in a case when SVM path sends the amount of SVM ptrs over
|
||||
// the max size of kernel arguments
|
||||
MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
|
||||
if (nullptr == ptr) {
|
||||
numMemObjectsInQueue_ = 0;
|
||||
return;
|
||||
}
|
||||
maxMemObjectsInQueue_ <<= 1;
|
||||
memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
|
||||
memObjectsInQueue_= ptr;
|
||||
}
|
||||
|
||||
// Adjust the number of active objects
|
||||
numMemObjectsInQueue_ -= endMemObjectsInQueue_;
|
||||
endMemObjectsInQueue_ = 0;
|
||||
|
||||
@@ -128,7 +128,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
}
|
||||
|
||||
// Did we reach the limit?
|
||||
if (maxMemObjectsInQueue_ <= (numMemObjectsInQueue_ + 1)) {
|
||||
if (maxMemObjectsInQueue_ <= numMemObjectsInQueue_) {
|
||||
flushL1Cache = true;
|
||||
}
|
||||
|
||||
@@ -157,14 +157,28 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
endMemObjectsInQueue_ = numMemObjectsInQueue_;
|
||||
}
|
||||
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
|
||||
// If the current launch didn't start from the beginning, then move the data
|
||||
if (0 != endMemObjectsInQueue_) {
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
memObjectsInQueue_[i].readOnly_ = memObjectsInQueue_[j].readOnly_;
|
||||
}
|
||||
} else if (numMemObjectsInQueue_ >= maxMemObjectsInQueue_) {
|
||||
// note: The array growth shouldn't occur under the normal conditions,
|
||||
// but in a case when SVM path sends the amount of SVM ptrs over
|
||||
// the max size of kernel arguments
|
||||
MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
|
||||
if (nullptr == ptr) {
|
||||
numMemObjectsInQueue_ = 0;
|
||||
return;
|
||||
}
|
||||
maxMemObjectsInQueue_ <<= 1;
|
||||
memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
|
||||
memObjectsInQueue_= ptr;
|
||||
}
|
||||
// Clear all objects except current kernel
|
||||
memset(&memObjectsInQueue_[i], 0, sizeof(amd::Memory*) * numMemObjectsInQueue_);
|
||||
|
||||
numMemObjectsInQueue_ -= endMemObjectsInQueue_;
|
||||
endMemObjectsInQueue_ = 0;
|
||||
}
|
||||
|
||||
新增問題並參考
封鎖使用者