P4 to Git Change 1069294 by bsumner@bsumner-lin-opencl on 2014/08/22 10:56:33
ECR #304775 - fix bug 10248 where patching the local mem pointer hadn't been previously accounted for Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#22 edit ... //depot/stg/opencl/drivers/opencl/library/x86/common/src/misc/workitem.cl#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#63 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#36 edit
Этот коммит содержится в:
@@ -28,8 +28,7 @@ WorkerThread::WorkerThread(const cpu::Device& device) :
|
||||
{
|
||||
localDataSize_ = (size_t) device.info().localMemSize_;
|
||||
localDataStorage_ = (address) amd::AlignedMemory::allocate(
|
||||
localDataSize_ + __CPU_SCRATCH_SIZE, sizeof(cl_long16)) +
|
||||
__CPU_SCRATCH_SIZE;
|
||||
localDataSize_ + __CPU_SCRATCH_SIZE, sizeof(cl_long16));
|
||||
|
||||
#if defined(__linux__) && defined(NUMA_SUPPORT)
|
||||
const nodemask_t* numaMask = device.getNumaMask();
|
||||
@@ -42,7 +41,7 @@ WorkerThread::WorkerThread(const cpu::Device& device) :
|
||||
WorkerThread::~WorkerThread()
|
||||
{
|
||||
guarantee(Thread::current() != this && "thread suicide!");
|
||||
amd::AlignedMemory::deallocate(localDataStorage_ - __CPU_SCRATCH_SIZE);
|
||||
amd::AlignedMemory::deallocate(localDataStorage_);
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -392,15 +391,16 @@ NDRangeKernelBatch::execute()
|
||||
const size_t numWorkItems = command.sizes().local().product();
|
||||
|
||||
address params = thread.baseWorkItemsStack();
|
||||
address localMemPtr = thread.localDataStorage();
|
||||
if (!patchParameters(kernel,
|
||||
params, localMemPtr, localMemPtr + thread.localDataSize(),
|
||||
address baseLocalMemPtr = thread.localDataStorage();
|
||||
address patchedLocalMemPtr = thread.localDataStorage() + __CPU_SCRATCH_SIZE;
|
||||
if (!patchParameters(kernel, params,
|
||||
patchedLocalMemPtr, patchedLocalMemPtr + thread.localDataSize(),
|
||||
kernel.workGroupInfo()->localMemSize_)) {
|
||||
return;
|
||||
}
|
||||
|
||||
WorkItem* workItem0 = ::new((WorkItem*)params - 1) WorkItem(
|
||||
command.sizes(), localMemPtr);
|
||||
command.sizes(), baseLocalMemPtr, patchedLocalMemPtr);
|
||||
|
||||
WorkGroup wg(command, kernel, thread, params, workItem0, numWorkItems);
|
||||
|
||||
@@ -549,7 +549,9 @@ WorkGroup::callKernelRange(kernelentrypoint_t entryPoint,
|
||||
}
|
||||
}
|
||||
|
||||
WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* localMemPtr)
|
||||
WorkItem::WorkItem(const amd::NDRangeContainer& sizes,
|
||||
void* scratchMemPtr,
|
||||
void* localMemPtr)
|
||||
{
|
||||
const amd::NDRange& local = sizes.local();
|
||||
const amd::NDRange& global = sizes.global();
|
||||
@@ -557,9 +559,11 @@ WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* localMemPtr)
|
||||
const size_t dims = sizes.dimensions();
|
||||
|
||||
tib_.builtins = &Builtins::dispatchTable_;
|
||||
tib_.work_dim = (cl_uint) sizes.dimensions();
|
||||
tib_.local_mem_base = localMemPtr;
|
||||
tib_.local_scratch = scratchMemPtr;
|
||||
tib_.table_base = (const void *)cpuTables;
|
||||
tib_.work_dim = (cl_uint) sizes.dimensions();
|
||||
|
||||
for (size_t i = 0; i < dims; ++i) {
|
||||
tib_.global_offset[i] = offset[i];
|
||||
tib_.global_size[i] = global[i];
|
||||
@@ -568,6 +572,7 @@ WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* localMemPtr)
|
||||
tib_.local_id[i] = 0;
|
||||
tib_.group_id[i] = 0;
|
||||
}
|
||||
|
||||
// Fill the remaining dimensions.
|
||||
for (size_t i = dims; i < sizeof(tib_.global_size)/sizeof(size_t); ++i) {
|
||||
tib_.global_offset[i] = 0;
|
||||
|
||||
@@ -101,7 +101,7 @@ public:
|
||||
};
|
||||
|
||||
protected:
|
||||
amd::Command& command_;
|
||||
amd::Command& command_;
|
||||
|
||||
public:
|
||||
Operation(amd::Command& command) : command_(command)
|
||||
@@ -113,7 +113,7 @@ public:
|
||||
|
||||
void cleanup();
|
||||
|
||||
amd::Command& command() { return command_;}
|
||||
amd::Command& command() { return command_;}
|
||||
|
||||
virtual void execute() = 0;
|
||||
};
|
||||
@@ -136,7 +136,10 @@ private:
|
||||
|
||||
public:
|
||||
//! Initialize this workgroup.
|
||||
WorkItem(const amd::NDRangeContainer& size, void* localMemPtr);
|
||||
WorkItem(
|
||||
const amd::NDRangeContainer& size,
|
||||
void* scratchMemPtr,
|
||||
void* localMemPtr);
|
||||
|
||||
//! Return the current WorkItem (based of the current stack pointer).
|
||||
static WorkItem* current() {
|
||||
@@ -303,59 +306,59 @@ public:
|
||||
class WorkerThread : public amd::Thread
|
||||
{
|
||||
private:
|
||||
Fiber mainFiber_; //!< main fiber for this worker thread.
|
||||
Fiber mainFiber_; //!< main fiber for this worker thread.
|
||||
|
||||
amd::Monitor queueLock_; //!< lock protecting the queue.
|
||||
amd::Monitor queueLock_; //!< lock protecting the queue.
|
||||
volatile int waitingOp_;
|
||||
bool terminated_; //!< true if the thread is shutting down.
|
||||
|
||||
//! Local memory storage
|
||||
address localDataStorage_;
|
||||
//! Size of the local memory.
|
||||
size_t localDataSize_;
|
||||
bool terminated_; //!< true if the thread is shutting down.
|
||||
|
||||
//! Local memory storage
|
||||
address localDataStorage_;
|
||||
//! Size of the local memory.
|
||||
size_t localDataSize_;
|
||||
|
||||
char operation_[MAX_OPERATION_ALLOC_SIZE];
|
||||
|
||||
address baseWorkItemsStack_;
|
||||
private:
|
||||
//! Awaits operations and execute them as they become ready.
|
||||
void loop();
|
||||
//! Awaits operations and execute them as they become ready.
|
||||
void loop();
|
||||
|
||||
public:
|
||||
//! Construct a new WorkerThread.
|
||||
WorkerThread(const cpu::Device& device);
|
||||
//! Destroy the worker thread.
|
||||
virtual ~WorkerThread();
|
||||
//! Cleanup the thread before termination.
|
||||
bool terminate();
|
||||
//! Construct a new WorkerThread.
|
||||
WorkerThread(const cpu::Device& device);
|
||||
//! Destroy the worker thread.
|
||||
virtual ~WorkerThread();
|
||||
//! Cleanup the thread before termination.
|
||||
bool terminate();
|
||||
|
||||
//! Return the main fiber for this thread.
|
||||
Fiber& mainFiber() { return mainFiber_; }
|
||||
//! Return the LDS for this thread
|
||||
address localDataStorage() const { return localDataStorage_; }
|
||||
//! Return the size of the local memory for this thread.
|
||||
size_t localDataSize() const { return localDataSize_; }
|
||||
//! Return the main fiber for this thread.
|
||||
Fiber& mainFiber() { return mainFiber_; }
|
||||
//! Return the LDS for this thread
|
||||
address localDataStorage() const { return localDataStorage_; }
|
||||
//! Return the size of the local memory for this thread.
|
||||
size_t localDataSize() const { return localDataSize_; }
|
||||
|
||||
address baseWorkItemsStack() { return baseWorkItemsStack_; }
|
||||
|
||||
Operation* operation() { return reinterpret_cast<Operation*>(operation_); }
|
||||
bool isOperationValid() { return waitingOp_ > 0; }
|
||||
|
||||
//! Enqueue a new operation to execute in this thread.
|
||||
void enqueue(Operation& op);
|
||||
//! Signal to start processing the commands in the queue.
|
||||
void flush() { amd::ScopedLock sl(queueLock_); queueLock_.notify(); }
|
||||
//! Enqueue a new operation to execute in this thread.
|
||||
void enqueue(Operation& op);
|
||||
//! Signal to start processing the commands in the queue.
|
||||
void flush() { amd::ScopedLock sl(queueLock_); queueLock_.notify(); }
|
||||
|
||||
//! This thread's execution engine.
|
||||
void run(void* data) {
|
||||
loop();
|
||||
}
|
||||
//! This thread's execution engine.
|
||||
void run(void* data) {
|
||||
loop();
|
||||
}
|
||||
|
||||
//! Return the currently executing WorkerThread's instance.
|
||||
static WorkerThread* current()
|
||||
{
|
||||
return static_cast<WorkerThread*>(Thread::current());
|
||||
}
|
||||
//! Return the currently executing WorkerThread's instance.
|
||||
static WorkerThread* current()
|
||||
{
|
||||
return static_cast<WorkerThread*>(Thread::current());
|
||||
}
|
||||
};
|
||||
|
||||
/*! @}
|
||||
|
||||
Ссылка в новой задаче
Block a user