P4 to Git Change 1270658 by gandryey@gera-w8 on 2016/05/18 17:53:45
SWDEV-86035 - Add PAL backend to OpenCL - Fix a crash in the pipe test. Device layer can't use device blit queue directly, but requires a blit manager call, which will perform correct wait for idle sequence. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/runtime/OCLRTQueue.cpp#2 edit
Этот коммит содержится в:
@@ -23,8 +23,8 @@ inline void
|
||||
DmaBlitManager::synchronize() const
|
||||
{
|
||||
if (syncOperation_) {
|
||||
gpu().waitAllEngines();
|
||||
gpu().releaseMemObjects();
|
||||
gpu().waitAllEngines();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2685,6 +2685,18 @@ KernelBlitManager::runScheduler(
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
KernelBlitManager::writeRawData(
|
||||
device::Memory& memory,
|
||||
size_t size,
|
||||
const void* data
|
||||
) const
|
||||
{
|
||||
static_cast<pal::Memory&>(memory).writeRawData(gpu(), size, data, false);
|
||||
|
||||
synchronize();
|
||||
}
|
||||
|
||||
amd::Memory*
|
||||
DmaBlitManager::pinHostMemory(
|
||||
const void* hostMem,
|
||||
|
||||
@@ -371,14 +371,21 @@ public:
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills an image memory with a pattern data
|
||||
virtual bool runScheduler(
|
||||
//! Runs a GPU scheduler for device enqueue
|
||||
bool runScheduler(
|
||||
device::Memory& vqueue, //!< Memory object for virtual queue
|
||||
device::Memory& params, //!< Extra arguments for the scheduler
|
||||
uint paramIdx, //!< Parameter index
|
||||
uint threads //!< Number of scheduling threads
|
||||
) const;
|
||||
|
||||
//! Writes CPU raw data into GPU memory
|
||||
void writeRawData(
|
||||
device::Memory& memory, //!< Memory object for data udpate
|
||||
size_t size, //!< Size of raw data
|
||||
const void* data //!< Raw data pointer
|
||||
) const;
|
||||
|
||||
private:
|
||||
static const size_t MaxXferBuffers = 2;
|
||||
|
||||
|
||||
@@ -987,8 +987,8 @@ Device::init()
|
||||
acl_error error;
|
||||
compiler_ = aclCompilerInit(&opts, &error);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Error initializing the compiler");
|
||||
return false;
|
||||
LogError("Error initializing the compiler");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = Pal::GetPlatformSize();
|
||||
@@ -1210,7 +1210,8 @@ Device::createBuffer(
|
||||
// Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
|
||||
// Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
|
||||
size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
|
||||
gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
|
||||
static_cast<const KernelBlitManager&>(xferMgr()).writeRawData(
|
||||
*gpuMemory, sizeof(pipeInit), pipeInit);
|
||||
}
|
||||
// If memory has direct access from host, then get CPU address
|
||||
if (gpuMemory->isHostMemDirectAccess() &&
|
||||
|
||||
@@ -677,9 +677,8 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
|
||||
return false;
|
||||
}
|
||||
|
||||
//! @todo get the right value;
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ = 64;//dev().getAttribs().wavefrontSize;
|
||||
workGroupInfo_.wavefrontSize_ = dev().properties().gfxipProperties.shaderCore.wavefrontSize;
|
||||
// Find total workgroup size
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ =
|
||||
@@ -999,7 +998,7 @@ HSAILKernel::loadArguments(
|
||||
break;
|
||||
}
|
||||
|
||||
//! @todo 64 bit isn't supported with 32 bit binary
|
||||
//! 64 bit isn't supported with 32 bit binary
|
||||
uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
|
||||
WriteAqlArg(&aqlArgBuf, &globalAddress, sizeof(void*));
|
||||
|
||||
|
||||
@@ -151,14 +151,8 @@ Memory::create(
|
||||
reinterpret_cast<Resource::ViewParams*>(params);
|
||||
// Check if parent was allocated in system memory
|
||||
if ((view->resource_->memoryType() == Resource::Pinned) ||
|
||||
(((view->resource_->memoryType() == Resource::Remote) ||
|
||||
(view->resource_->memoryType() == Resource::RemoteUSWC)) &&
|
||||
// @todo Enable unconditional optimization for remote memory
|
||||
// Check for external allocation, to avoid the optimization
|
||||
// for non-VM (double copy) mode
|
||||
(owner() != nullptr) &&
|
||||
((owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) ||
|
||||
dev().settings().remoteAlloc_))) {
|
||||
(view->resource_->memoryType() == Resource::Remote) ||
|
||||
(view->resource_->memoryType() == Resource::RemoteUSWC)) {
|
||||
// Marks memory object for direct GPU access to the host memory
|
||||
flags_ |= HostMemoryDirectAccess;
|
||||
}
|
||||
@@ -578,10 +572,6 @@ Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
|
||||
}
|
||||
}
|
||||
|
||||
//!@todo A wait isn't really necessary. However
|
||||
//! Linux no-VM may have extra random failures.
|
||||
wait(gpu);
|
||||
|
||||
// Should never fail
|
||||
assert(result && "Memory synchronization failed!");
|
||||
}
|
||||
|
||||
@@ -237,7 +237,6 @@ HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStage
|
||||
aclType from = ACL_TYPE_DEFAULT;
|
||||
needOptionsCheck = true;
|
||||
size_t boolSize = sizeof(bool);
|
||||
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
||||
// Checking llvmir in .llvmir section
|
||||
bool containsSpirv = true;
|
||||
errorCode = aclQueryInfo(dev().compiler(), binaryElf_,
|
||||
@@ -375,7 +374,6 @@ HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options) {
|
||||
return continueCompileFrom;
|
||||
}
|
||||
bool recompile = false;
|
||||
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
||||
switch (continueCompileFrom) {
|
||||
case ACL_TYPE_HSAIL_BINARY:
|
||||
case ACL_TYPE_CG:
|
||||
|
||||
@@ -149,8 +149,9 @@ VirtualGPU::Queue::addCmdMemRef(Pal::IGpuMemory* iMem)
|
||||
void
|
||||
VirtualGPU::Queue::removeCmdMemRef(Pal::IGpuMemory* iMem)
|
||||
{
|
||||
memReferences_.erase(iMem);
|
||||
iDev_->RemoveGpuMemoryReferences(1, &iMem, iQueue_);
|
||||
if (0 != memReferences_.erase(iMem)) {
|
||||
iDev_->RemoveGpuMemoryReferences(1, &iMem, iQueue_);
|
||||
}
|
||||
}
|
||||
|
||||
uint
|
||||
@@ -2264,39 +2265,32 @@ VirtualGPU::submitMarker(amd::Marker& vcmd)
|
||||
GpuEvent*
|
||||
VirtualGPU::getGpuEvent(Pal::IGpuMemory* iMem)
|
||||
{
|
||||
GpuEvents::iterator it = gpuEvents_.find(iMem);
|
||||
if (it == gpuEvents_.end()) {
|
||||
// queue(MainEngine).addMemRef(iMem);
|
||||
// queue(SdmaEngine).addMemRef(iMem);
|
||||
}
|
||||
return &gpuEvents_[iMem];
|
||||
}
|
||||
|
||||
void
|
||||
VirtualGPU::assignGpuEvent(Pal::IGpuMemory* iMem, GpuEvent gpuEvent)
|
||||
{
|
||||
GpuEvents::iterator it = gpuEvents_.find(iMem);
|
||||
auto it = gpuEvents_.find(iMem);
|
||||
|
||||
if (it != gpuEvents_.end()) {
|
||||
it->second = gpuEvent;
|
||||
}
|
||||
else {
|
||||
// queue(gpuEvent.engineId_).addMemRef(iMem);
|
||||
gpuEvents_[iMem] = gpuEvent;
|
||||
}
|
||||
// queues_[gpuEvent.engineId_]->addCmdMemRef(iMem);
|
||||
}
|
||||
|
||||
void
|
||||
VirtualGPU::releaseMemory(Pal::IGpuMemory* iMem, bool wait)
|
||||
{
|
||||
auto it = gpuEvents_.find(iMem);
|
||||
//! @note if there is no wait, then it's a view release
|
||||
if (wait) {
|
||||
waitForEvent(&gpuEvents_[iMem]);
|
||||
//queue(MainEngine).removeMemRef(iMem);
|
||||
//queue(SdmaEngine).removeMemRef(iMem);
|
||||
if (wait && (it != gpuEvents_.end())) {
|
||||
waitForEvent(&it->second);
|
||||
queues_[MainEngine]->removeCmdMemRef(iMem);
|
||||
queues_[SdmaEngine]->removeCmdMemRef(iMem);
|
||||
gpuEvents_.erase(iMem);
|
||||
gpuEvents_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user