P4 to Git Change 1069294 by bsumner@bsumner-lin-opencl on 2014/08/22 10:56:33

ECR #304775 - fix bug 10248 where patching the local mem pointer hadn't been previously accounted for

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#22 edit
... //depot/stg/opencl/drivers/opencl/library/x86/common/src/misc/workitem.cl#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#63 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#36 edit
Этот коммит содержится в:
foreman
2014-08-22 11:05:20 -04:00
родитель 762e51bb71
Коммит 85d86251c1
2 изменённых файлов: 55 добавлений и 47 удалений
+14 -9
Просмотреть файл
@@ -28,8 +28,7 @@ WorkerThread::WorkerThread(const cpu::Device& device) :
{
localDataSize_ = (size_t) device.info().localMemSize_;
localDataStorage_ = (address) amd::AlignedMemory::allocate(
localDataSize_ + __CPU_SCRATCH_SIZE, sizeof(cl_long16)) +
__CPU_SCRATCH_SIZE;
localDataSize_ + __CPU_SCRATCH_SIZE, sizeof(cl_long16));
#if defined(__linux__) && defined(NUMA_SUPPORT)
const nodemask_t* numaMask = device.getNumaMask();
@@ -42,7 +41,7 @@ WorkerThread::WorkerThread(const cpu::Device& device) :
WorkerThread::~WorkerThread()
{
guarantee(Thread::current() != this && "thread suicide!");
amd::AlignedMemory::deallocate(localDataStorage_ - __CPU_SCRATCH_SIZE);
amd::AlignedMemory::deallocate(localDataStorage_);
}
bool
@@ -392,15 +391,16 @@ NDRangeKernelBatch::execute()
const size_t numWorkItems = command.sizes().local().product();
address params = thread.baseWorkItemsStack();
address localMemPtr = thread.localDataStorage();
if (!patchParameters(kernel,
params, localMemPtr, localMemPtr + thread.localDataSize(),
address baseLocalMemPtr = thread.localDataStorage();
address patchedLocalMemPtr = thread.localDataStorage() + __CPU_SCRATCH_SIZE;
if (!patchParameters(kernel, params,
patchedLocalMemPtr, patchedLocalMemPtr + thread.localDataSize(),
kernel.workGroupInfo()->localMemSize_)) {
return;
}
WorkItem* workItem0 = ::new((WorkItem*)params - 1) WorkItem(
command.sizes(), localMemPtr);
command.sizes(), baseLocalMemPtr, patchedLocalMemPtr);
WorkGroup wg(command, kernel, thread, params, workItem0, numWorkItems);
@@ -549,7 +549,9 @@ WorkGroup::callKernelRange(kernelentrypoint_t entryPoint,
}
}
WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* localMemPtr)
WorkItem::WorkItem(const amd::NDRangeContainer& sizes,
void* scratchMemPtr,
void* localMemPtr)
{
const amd::NDRange& local = sizes.local();
const amd::NDRange& global = sizes.global();
@@ -557,9 +559,11 @@ WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* localMemPtr)
const size_t dims = sizes.dimensions();
tib_.builtins = &Builtins::dispatchTable_;
tib_.work_dim = (cl_uint) sizes.dimensions();
tib_.local_mem_base = localMemPtr;
tib_.local_scratch = scratchMemPtr;
tib_.table_base = (const void *)cpuTables;
tib_.work_dim = (cl_uint) sizes.dimensions();
for (size_t i = 0; i < dims; ++i) {
tib_.global_offset[i] = offset[i];
tib_.global_size[i] = global[i];
@@ -568,6 +572,7 @@ WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* localMemPtr)
tib_.local_id[i] = 0;
tib_.group_id[i] = 0;
}
// Fill the remaining dimensions.
for (size_t i = dims; i < sizeof(tib_.global_size)/sizeof(size_t); ++i) {
tib_.global_offset[i] = 0;
+41 -38
Просмотреть файл
@@ -101,7 +101,7 @@ public:
};
protected:
amd::Command& command_;
amd::Command& command_;
public:
Operation(amd::Command& command) : command_(command)
@@ -113,7 +113,7 @@ public:
void cleanup();
amd::Command& command() { return command_;}
amd::Command& command() { return command_;}
virtual void execute() = 0;
};
@@ -136,7 +136,10 @@ private:
public:
//! Initialize this workgroup.
WorkItem(const amd::NDRangeContainer& size, void* localMemPtr);
WorkItem(
const amd::NDRangeContainer& size,
void* scratchMemPtr,
void* localMemPtr);
//! Return the current WorkItem (based of the current stack pointer).
static WorkItem* current() {
@@ -303,59 +306,59 @@ public:
class WorkerThread : public amd::Thread
{
private:
Fiber mainFiber_; //!< main fiber for this worker thread.
Fiber mainFiber_; //!< main fiber for this worker thread.
amd::Monitor queueLock_; //!< lock protecting the queue.
amd::Monitor queueLock_; //!< lock protecting the queue.
volatile int waitingOp_;
bool terminated_; //!< true if the thread is shutting down.
//! Local memory storage
address localDataStorage_;
//! Size of the local memory.
size_t localDataSize_;
bool terminated_; //!< true if the thread is shutting down.
//! Local memory storage
address localDataStorage_;
//! Size of the local memory.
size_t localDataSize_;
char operation_[MAX_OPERATION_ALLOC_SIZE];
address baseWorkItemsStack_;
private:
//! Awaits operations and execute them as they become ready.
void loop();
//! Awaits operations and execute them as they become ready.
void loop();
public:
//! Construct a new WorkerThread.
WorkerThread(const cpu::Device& device);
//! Destroy the worker thread.
virtual ~WorkerThread();
//! Cleanup the thread before termination.
bool terminate();
//! Construct a new WorkerThread.
WorkerThread(const cpu::Device& device);
//! Destroy the worker thread.
virtual ~WorkerThread();
//! Cleanup the thread before termination.
bool terminate();
//! Return the main fiber for this thread.
Fiber& mainFiber() { return mainFiber_; }
//! Return the LDS for this thread
address localDataStorage() const { return localDataStorage_; }
//! Return the size of the local memory for this thread.
size_t localDataSize() const { return localDataSize_; }
//! Return the main fiber for this thread.
Fiber& mainFiber() { return mainFiber_; }
//! Return the LDS for this thread
address localDataStorage() const { return localDataStorage_; }
//! Return the size of the local memory for this thread.
size_t localDataSize() const { return localDataSize_; }
address baseWorkItemsStack() { return baseWorkItemsStack_; }
Operation* operation() { return reinterpret_cast<Operation*>(operation_); }
bool isOperationValid() { return waitingOp_ > 0; }
//! Enqueue a new operation to execute in this thread.
void enqueue(Operation& op);
//! Signal to start processing the commands in the queue.
void flush() { amd::ScopedLock sl(queueLock_); queueLock_.notify(); }
//! Enqueue a new operation to execute in this thread.
void enqueue(Operation& op);
//! Signal to start processing the commands in the queue.
void flush() { amd::ScopedLock sl(queueLock_); queueLock_.notify(); }
//! This thread's execution engine.
void run(void* data) {
loop();
}
//! This thread's execution engine.
void run(void* data) {
loop();
}
//! Return the currently executing WorkerThread's instance.
static WorkerThread* current()
{
return static_cast<WorkerThread*>(Thread::current());
}
//! Return the currently executing WorkerThread's instance.
static WorkerThread* current()
{
return static_cast<WorkerThread*>(Thread::current());
}
};
/*! @}