P4 to Git Change 1270658 by gandryey@gera-w8 on 2016/05/18 17:53:45

SWDEV-86035 - Add PAL backend to OpenCL
	- Fix a crash in the pipe test. Device layer can't use device blit queue directly, but requires a blit manager call, which will perform correct wait for idle sequence.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/runtime/OCLRTQueue.cpp#2 edit
Этот коммит содержится в:
foreman
2016-05-18 18:11:40 -04:00
родитель 11c0c4e127
Коммит a94fa4eabb
7 изменённых файлов: 39 добавлений и 38 удалений
+13 -1
Просмотреть файл
@@ -23,8 +23,8 @@ inline void
DmaBlitManager::synchronize() const
{
if (syncOperation_) {
gpu().waitAllEngines();
gpu().releaseMemObjects();
gpu().waitAllEngines();
}
}
@@ -2685,6 +2685,18 @@ KernelBlitManager::runScheduler(
return result;
}
void
KernelBlitManager::writeRawData(
device::Memory& memory,
size_t size,
const void* data
) const
{
static_cast<pal::Memory&>(memory).writeRawData(gpu(), size, data, false);
synchronize();
}
amd::Memory*
DmaBlitManager::pinHostMemory(
const void* hostMem,
+9 -2
Просмотреть файл
@@ -371,14 +371,21 @@ public:
bool entire = false //!< Entire buffer will be updated
) const;
//! Fills an image memory with a pattern data
virtual bool runScheduler(
//! Runs a GPU scheduler for device enqueue
bool runScheduler(
device::Memory& vqueue, //!< Memory object for virtual queue
device::Memory& params, //!< Extra arguments for the scheduler
uint paramIdx, //!< Parameter index
uint threads //!< Number of scheduling threads
) const;
//! Writes CPU raw data into GPU memory
void writeRawData(
device::Memory& memory, //!< Memory object for data udpate
size_t size, //!< Size of raw data
const void* data //!< Raw data pointer
) const;
private:
static const size_t MaxXferBuffers = 2;
+4 -3
Просмотреть файл
@@ -987,8 +987,8 @@ Device::init()
acl_error error;
compiler_ = aclCompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogError("Error initializing the compiler");
return false;
LogError("Error initializing the compiler");
return false;
}
size_t size = Pal::GetPlatformSize();
@@ -1210,7 +1210,8 @@ Device::createBuffer(
// Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
// Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
static_cast<const KernelBlitManager&>(xferMgr()).writeRawData(
*gpuMemory, sizeof(pipeInit), pipeInit);
}
// If memory has direct access from host, then get CPU address
if (gpuMemory->isHostMemDirectAccess() &&
+2 -3
Просмотреть файл
@@ -677,9 +677,8 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
return false;
}
//! @todo get the right value;
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = 64;//dev().getAttribs().wavefrontSize;
workGroupInfo_.wavefrontSize_ = dev().properties().gfxipProperties.shaderCore.wavefrontSize;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ =
@@ -999,7 +998,7 @@ HSAILKernel::loadArguments(
break;
}
//! @todo 64 bit isn't supported with 32 bit binary
//! 64 bit isn't supported with 32 bit binary
uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
WriteAqlArg(&aqlArgBuf, &globalAddress, sizeof(void*));
+2 -12
Просмотреть файл
@@ -151,14 +151,8 @@ Memory::create(
reinterpret_cast<Resource::ViewParams*>(params);
// Check if parent was allocated in system memory
if ((view->resource_->memoryType() == Resource::Pinned) ||
(((view->resource_->memoryType() == Resource::Remote) ||
(view->resource_->memoryType() == Resource::RemoteUSWC)) &&
// @todo Enable unconditional optimization for remote memory
// Check for external allocation, to avoid the optimization
// for non-VM (double copy) mode
(owner() != nullptr) &&
((owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) ||
dev().settings().remoteAlloc_))) {
(view->resource_->memoryType() == Resource::Remote) ||
(view->resource_->memoryType() == Resource::RemoteUSWC)) {
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
}
@@ -578,10 +572,6 @@ Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
}
}
//!@todo A wait isn't really necessary. However
//! Linux no-VM may have extra random failures.
wait(gpu);
// Should never fail
assert(result && "Memory synchronization failed!");
}
-2
Просмотреть файл
@@ -237,7 +237,6 @@ HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStage
aclType from = ACL_TYPE_DEFAULT;
needOptionsCheck = true;
size_t boolSize = sizeof(bool);
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
// Checking llvmir in .llvmir section
bool containsSpirv = true;
errorCode = aclQueryInfo(dev().compiler(), binaryElf_,
@@ -375,7 +374,6 @@ HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options) {
return continueCompileFrom;
}
bool recompile = false;
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
switch (continueCompileFrom) {
case ACL_TYPE_HSAIL_BINARY:
case ACL_TYPE_CG:
+9 -15
Просмотреть файл
@@ -149,8 +149,9 @@ VirtualGPU::Queue::addCmdMemRef(Pal::IGpuMemory* iMem)
void
VirtualGPU::Queue::removeCmdMemRef(Pal::IGpuMemory* iMem)
{
memReferences_.erase(iMem);
iDev_->RemoveGpuMemoryReferences(1, &iMem, iQueue_);
if (0 != memReferences_.erase(iMem)) {
iDev_->RemoveGpuMemoryReferences(1, &iMem, iQueue_);
}
}
uint
@@ -2264,39 +2265,32 @@ VirtualGPU::submitMarker(amd::Marker& vcmd)
GpuEvent*
VirtualGPU::getGpuEvent(Pal::IGpuMemory* iMem)
{
GpuEvents::iterator it = gpuEvents_.find(iMem);
if (it == gpuEvents_.end()) {
// queue(MainEngine).addMemRef(iMem);
// queue(SdmaEngine).addMemRef(iMem);
}
return &gpuEvents_[iMem];
}
void
VirtualGPU::assignGpuEvent(Pal::IGpuMemory* iMem, GpuEvent gpuEvent)
{
GpuEvents::iterator it = gpuEvents_.find(iMem);
auto it = gpuEvents_.find(iMem);
if (it != gpuEvents_.end()) {
it->second = gpuEvent;
}
else {
// queue(gpuEvent.engineId_).addMemRef(iMem);
gpuEvents_[iMem] = gpuEvent;
}
// queues_[gpuEvent.engineId_]->addCmdMemRef(iMem);
}
void
VirtualGPU::releaseMemory(Pal::IGpuMemory* iMem, bool wait)
{
auto it = gpuEvents_.find(iMem);
//! @note if there is no wait, then it's a view release
if (wait) {
waitForEvent(&gpuEvents_[iMem]);
//queue(MainEngine).removeMemRef(iMem);
//queue(SdmaEngine).removeMemRef(iMem);
if (wait && (it != gpuEvents_.end())) {
waitForEvent(&it->second);
queues_[MainEngine]->removeCmdMemRef(iMem);
queues_[SdmaEngine]->removeCmdMemRef(iMem);
gpuEvents_.erase(iMem);
gpuEvents_.erase(it);
}
}