From 3f79785a96950a80ae8e5432e5eebd89e8d22696 Mon Sep 17 00:00:00 2001
From: foreman <dl.constructicon@amd.com>
Date: Tue, 8 May 2018 15:47:58 -0400
Subject: [PATCH] P4 to Git Change 1552021 by gandryey@gera-w8 on 2018/05/08
 15:00:13

	SWDEV-151981 - Removal of CPU support on Windows
	- Part 5. Remove runtime logic that considered CPU device support.

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#130 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#92 edit
---
 rocclr/runtime/platform/command.cpp | 255 +++++++++++++---------------
 rocclr/runtime/platform/context.cpp |  59 +++----
 rocclr/runtime/platform/memory.cpp  |  15 +-
 rocclr/runtime/platform/program.cpp |   2 +-
 4 files changed, 140 insertions(+), 191 deletions(-)

diff --git a/rocclr/runtime/platform/command.cpp b/rocclr/runtime/platform/command.cpp
index 6e53636526..eb6dd0580b 100644
--- a/rocclr/runtime/platform/command.cpp
+++ b/rocclr/runtime/platform/command.cpp
@@ -279,28 +279,24 @@ cl_int NativeFnCommand::invoke() {
 }
 
 bool OneMemoryArgCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    device::Memory* mem = memory_->getDeviceMemory(queue()->device());
-    if (NULL == mem) {
-      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
-      return false;
-    }
+  device::Memory* mem = memory_->getDeviceMemory(queue()->device());
+  if (NULL == mem) {
+    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
+    return false;
   }
   return true;
 }
 
 bool TwoMemoryArgsCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    device::Memory* mem = memory1_->getDeviceMemory(queue()->device());
-    if (NULL == mem) {
-      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
-      return false;
-    }
-    mem = memory2_->getDeviceMemory(queue()->device());
-    if (NULL == mem) {
-      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
-      return false;
-    }
+  device::Memory* mem = memory1_->getDeviceMemory(queue()->device());
+  if (NULL == mem) {
+    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
+    return false;
+  }
+  mem = memory2_->getDeviceMemory(queue()->device());
+  if (NULL == mem) {
+    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
+    return false;
   }
   return true;
 }
@@ -356,74 +352,68 @@ bool MapMemoryCommand::isEntireMemory() const {
 }
 
 void UnmapMemoryCommand::releaseResources() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    //! @todo This is a workaround to a deadlock on indirect map release.
-    //! Remove this code when CAL will have a refcounter on memory.
-    //! decIndMapCount() has to go back to submitUnmapMemory()
-    device::Memory* mem = memory_->getDeviceMemory(queue()->device());
-    if (NULL != mem) {
-      mem->releaseIndirectMap();
-    }
+  //! @todo This is a workaround to a deadlock on indirect map release.
+  //! Remove this code when CAL will have a refcounter on memory.
+  //! decIndMapCount() has to go back to submitUnmapMemory()
+  device::Memory* mem = memory_->getDeviceMemory(queue()->device());
+  if (NULL != mem) {
+    mem->releaseIndirectMap();
   }
+
   OneMemoryArgCommand::releaseResources();
 }
 
 bool MigrateMemObjectsCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    for (const auto& it : memObjects_) {
-      device::Memory* mem = it->getDeviceMemory(queue()->device());
-      if (NULL == mem) {
-        LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
-        return false;
-      }
+  for (const auto& it : memObjects_) {
+    device::Memory* mem = it->getDeviceMemory(queue()->device());
+    if (NULL == mem) {
+      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
+      return false;
     }
   }
-
   return true;
 }
 
 cl_int NDRangeKernelCommand::validateMemory() {
   const amd::Device& device = queue()->device();
-  if (device.info().type_ & CL_DEVICE_TYPE_GPU) {
-    // Validate the kernel before submission
-    if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
-      return CL_OUT_OF_RESOURCES;
-    }
+  // Validate the kernel before submission
+  if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
+    return CL_OUT_OF_RESOURCES;
+  }
 
-    const amd::KernelSignature& signature = kernel().signature();
-    for (uint i = 0; i != signature.numParameters(); ++i) {
-      const amd::KernelParameterDescriptor& desc = signature.at(i);
-      // Check if it's a memory object
-      if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
-        amd::Memory* amdMemory;
-        if (kernel().parameters().boundToSvmPointer(device, parameters_, i)) {
-          // find the real mem object from svm ptr from the list
-          amdMemory = amd::SvmManager::FindSvmBuffer(
-              *reinterpret_cast<void* const*>(parameters() + desc.offset_));
-        } else {
-          amdMemory = *reinterpret_cast<amd::Memory* const*>(parameters() + desc.offset_);
+  const amd::KernelSignature& signature = kernel().signature();
+  for (uint i = 0; i != signature.numParameters(); ++i) {
+    const amd::KernelParameterDescriptor& desc = signature.at(i);
+    // Check if it's a memory object
+    if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
+      amd::Memory* amdMemory;
+      if (kernel().parameters().boundToSvmPointer(device, parameters_, i)) {
+        // find the real mem object from svm ptr from the list
+        amdMemory = amd::SvmManager::FindSvmBuffer(
+            *reinterpret_cast<void* const*>(parameters() + desc.offset_));
+      } else {
+        amdMemory = *reinterpret_cast<amd::Memory* const*>(parameters() + desc.offset_);
+      }
+      if (amdMemory != NULL) {
+        if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
+          // Make sure argument size isn't bigger than the device limit
+          if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
+            LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
+            return CL_OUT_OF_RESOURCES;
+          }
         }
-        if (amdMemory != NULL) {
-          if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
-            // Make sure argument size isn't bigger than the device limit
-            if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
-              LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
-              return CL_OUT_OF_RESOURCES;
-            }
-          }
-          device::Memory* mem = amdMemory->getDeviceMemory(device);
-          if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
-            if (device.reallocMemory(*amdMemory)) {
-              mem = amdMemory->getDeviceMemory(device);
-            } else {
-              mem = NULL;
-            }
-          }
-          if (NULL == mem) {
-            LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
-            return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+        device::Memory* mem = amdMemory->getDeviceMemory(device);
+        if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
+          if (device.reallocMemory(*amdMemory)) {
+            mem = amdMemory->getDeviceMemory(device);
+          } else {
+            mem = NULL;
           }
         }
+        if (NULL == mem) {
+          LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
+          return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+        }
       }
     }
   }
@@ -432,15 +422,13 @@ cl_int NDRangeKernelCommand::validateMemory() {
 
 bool ExtObjectsCommand::validateMemory() {
   bool retVal = true;
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    for (const auto& it : memObjects_) {
-      device::Memory* mem = it->getDeviceMemory(queue()->device());
-      if (NULL == mem) {
-        LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
-        return false;
-      }
-      retVal = processGLResource(mem);
+  for (const auto& it : memObjects_) {
+    device::Memory* mem = it->getDeviceMemory(queue()->device());
+    if (NULL == mem) {
+      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
+      return false;
     }
+    retVal = processGLResource(mem);
   }
   return retVal;
 }
@@ -454,33 +442,28 @@ bool ReleaseExtObjectsCommand::processGLResource(device::Memory* mem) {
 }
 
 bool MakeBuffersResidentCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    for (const auto& it : memObjects_) {
-      device::Memory* mem = it->getDeviceMemory(queue()->device());
-      if (NULL == mem) {
-        LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
-        return false;
-      }
+  for (const auto& it : memObjects_) {
+    device::Memory* mem = it->getDeviceMemory(queue()->device());
+    if (NULL == mem) {
+      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
+      return false;
     }
   }
-
   return true;
 }
+
 bool ThreadTraceMemObjectsCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    for (auto it = memObjects_.cbegin(); it != memObjects_.cend(); it++) {
-      device::Memory* mem = (*it)->getDeviceMemory(queue()->device());
-      if (NULL == mem) {
-        for (auto tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) {
-          device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device());
-          delete tmpMem;
-        }
-        LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize());
-        return false;
+  for (auto it = memObjects_.cbegin(); it != memObjects_.cend(); it++) {
+    device::Memory* mem = (*it)->getDeviceMemory(queue()->device());
+    if (NULL == mem) {
+      for (auto tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) {
+        device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device());
+        delete tmpMem;
       }
+      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize());
+      return false;
     }
   }
-
   return true;
 }
 
@@ -527,56 +510,52 @@ void TransferBufferFileCommand::submit(device::VirtualDevice& device) {
 }
 
 bool TransferBufferFileCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    // Check if the destination buffer has direct host access
-    if (!(memory_->getMemFlags() &
-          (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD))) {
-      // Allocate staging buffers
-      for (uint i = 0; i < NumStagingBuffers; ++i) {
-        staging_[i] = new (memory_->getContext())
-            Buffer(memory_->getContext(), StagingBufferMemType, StagingBufferSize);
-        if (NULL == staging_[i] || !staging_[i]->create(nullptr)) {
-          return false;
-        }
-        device::Memory* mem = staging_[i]->getDeviceMemory(queue()->device());
-        if (NULL == mem) {
-          LogPrintfError("Can't allocate staging buffer - 0x%08X bytes!", staging_[i]->getSize());
-          return false;
-        }
+  // Check if the destination buffer has direct host access
+  if (!(memory_->getMemFlags() &
+        (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD))) {
+    // Allocate staging buffers
+    for (uint i = 0; i < NumStagingBuffers; ++i) {
+      staging_[i] = new (memory_->getContext())
+          Buffer(memory_->getContext(), StagingBufferMemType, StagingBufferSize);
+      if (NULL == staging_[i] || !staging_[i]->create(nullptr)) {
+        return false;
+      }
+      device::Memory* mem = staging_[i]->getDeviceMemory(queue()->device());
+      if (NULL == mem) {
+        LogPrintfError("Can't allocate staging buffer - 0x%08X bytes!", staging_[i]->getSize());
+        return false;
       }
     }
+  }
 
-    device::Memory* mem = memory_->getDeviceMemory(queue()->device());
-    if (NULL == mem) {
-      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
-      return false;
-    }
+  device::Memory* mem = memory_->getDeviceMemory(queue()->device());
+  if (NULL == mem) {
+    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
+    return false;
   }
   return true;
 }
 
 bool CopyMemoryP2PCommand::validateMemory() {
-  if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
-    const std::vector<Device*>& devices = memory1_->getContext().devices();
-    if (devices.size() != 1) {
-      LogError("Can't allocate memory object for P2P extension");
-      return false;
-    }
-    device::Memory* mem = memory1_->getDeviceMemory(*devices[0]);
-    if (nullptr == mem) {
-      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
-      return false;
-    }
-    const std::vector<Device*>& devices2 = memory2_->getContext().devices();
-    if (devices2.size() != 1) {
-      LogError("Can't allocate memory object for P2P extension");
-      return false;
-    }
-    mem = memory2_->getDeviceMemory(*devices2[0]);
-    if (nullptr == mem) {
-      LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
-      return false;
-    }
+  const std::vector<Device*>& devices = memory1_->getContext().devices();
+  if (devices.size() != 1) {
+    LogError("Can't allocate memory object for P2P extension");
+    return false;
+  }
+  device::Memory* mem = memory1_->getDeviceMemory(*devices[0]);
+  if (nullptr == mem) {
+    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
+    return false;
+  }
+  const std::vector<Device*>& devices2 = memory2_->getContext().devices();
+  if (devices2.size() != 1) {
+    LogError("Can't allocate memory object for P2P extension");
+    return false;
+  }
+  mem = memory2_->getDeviceMemory(*devices2[0]);
+  if (nullptr == mem) {
+    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
+    return false;
   }
   return true;
 }
diff --git a/rocclr/runtime/platform/context.cpp b/rocclr/runtime/platform/context.cpp
index e05e866c18..17b106e95b 100644
--- a/rocclr/runtime/platform/context.cpp
+++ b/rocclr/runtime/platform/context.cpp
@@ -40,16 +40,10 @@ Context::Context(const std::vector<Device*>& devices, const Info& info)
     }
   }
   if (svmAllocDevice_.size() > 1) {
-    // make sure the CPU is the last device to do allocation.
-    if ((svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU)) {
-      std::swap(svmAllocDevice_.front(), svmAllocDevice_.back());
-    }
-
     uint isFirstDeviceFGSEnabled = svmAllocDevice_.front()->isFineGrainedSystem(true);
     for (auto& dev : svmAllocDevice_) {
       // allocation on fine - grained system incapable device first
-      if (isFirstDeviceFGSEnabled && (dev->type() == CL_DEVICE_TYPE_GPU) &&
-          (!(dev->isFineGrainedSystem(true)))) {
+      if (isFirstDeviceFGSEnabled && !dev->isFineGrainedSystem(true)) {
         std::swap(svmAllocDevice_.front(), dev);
         break;
       }
@@ -288,42 +282,29 @@ void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags) {
     return NULL;
   }
 
-  if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
-    return AlignedMemory::allocate(size, alignment);
-  } else {
-    void* svmPtrAlloced = NULL;
-    void* tempPtr = NULL;
-
-    amd::ScopedLock lock(&ctxLock_);
-    for (const auto& dev : svmAllocDevice_) {
-      if (dev->type() == CL_DEVICE_TYPE_GPU) {
-        // check if the device support svm platform atomics,
-        // skipped allocation for platform atomics if not supported by this device
-        if ((flags & CL_MEM_SVM_ATOMICS) &&
-            !(dev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) {
-          continue;
-        }
-        svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced);
-        if (svmPtrAlloced == NULL) {
-          return NULL;
-        }
-      }
-    }
-    return svmPtrAlloced;
-  }
-}
-
-void Context::svmFree(void* ptr) const {
-  if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
-    AlignedMemory::deallocate(ptr);
-    return;
-  }
+  void* svmPtrAlloced = NULL;
+  void* tempPtr = NULL;
 
   amd::ScopedLock lock(&ctxLock_);
   for (const auto& dev : svmAllocDevice_) {
-    if (dev->type() == CL_DEVICE_TYPE_GPU) {
-      dev->svmFree(ptr);
+    // check if the device support svm platform atomics,
+    // skipped allocation for platform atomics if not supported by this device
+    if ((flags & CL_MEM_SVM_ATOMICS) &&
+        !(dev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) {
+      continue;
     }
+    svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced);
+    if (svmPtrAlloced == NULL) {
+      return NULL;
+    }
+  }
+  return svmPtrAlloced;
+}
+
+void Context::svmFree(void* ptr) const {
+  amd::ScopedLock lock(&ctxLock_);
+  for (const auto& dev : svmAllocDevice_) {
+    dev->svmFree(ptr);
   }
   return;
 }
diff --git a/rocclr/runtime/platform/memory.cpp b/rocclr/runtime/platform/memory.cpp
index 90a2432212..c3a5b119b4 100644
--- a/rocclr/runtime/platform/memory.cpp
+++ b/rocclr/runtime/platform/memory.cpp
@@ -156,14 +156,6 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy)
 
   const std::vector<Device*>& devices = context_().devices();
 
-  // Find if a non GPU device was created with the context
-  for (size_t i = 0; i < devices.size(); i++) {
-    if (!(devices[i]->info().type_ & CL_DEVICE_TYPE_GPU)) {
-      allocHostMem = true;
-      break;
-    }
-  }
-
   // This allocation is necessary to use coherency mechanism
   // for the initialization
   if (getMemFlags() & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) {
@@ -243,11 +235,8 @@ bool Memory::create(void* initFrom, bool sysMemAlloc) {
   for (size_t i = 0; i < devices.size(); i++) {
     deviceAlloced_[devices[i]] = AllocInit;
 
-    // Only GPU devices have device memory objects
-    if (devices[i]->info().type_ & CL_DEVICE_TYPE_GPU) {
-      deviceMemories_[i].ref_ = devices[i];
-      deviceMemories_[i].value_ = NULL;
-    }
+    deviceMemories_[i].ref_ = devices[i];
+    deviceMemories_[i].value_ = NULL;
 
     if (DISABLE_DEFERRED_ALLOC) {
       device::Memory* mem = getDeviceMemory(*devices[i]);
diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp
index ffcd627b3e..6338f444ad 100644
--- a/rocclr/runtime/platform/program.cpp
+++ b/rocclr/runtime/platform/program.cpp
@@ -605,7 +605,7 @@ bool Program::ParseAllOptions(const std::string& options, option::Options& parse
 
 bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func, bool noAlias) {
   // FIXME_lmoriche: check that the signatures are compatible
-  if (deviceKernels_.size() == 0 || device.type() == CL_DEVICE_TYPE_CPU) {
+  if (deviceKernels_.size() == 0) {
     signature_ = func->signature();
   }