Files
rocm-systems/rocclr/runtime/device/cpu/cpucommand.hpp
T
foreman 024acc392e P4 to Git Change 1102693 by jsjodin@jsjodin_linux_avx_hsa on 2014/12/04 17:11:26
ECR #304775 - Change the way SIGFPE is handled.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#14 edit
2014-12-04 17:19:31 -05:00

430 строки
11 KiB
C++

//
// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef OPERATION_HPP_
#define OPERATION_HPP_
#include "top.hpp"
#include "device/cpu/cpudevice.hpp"
#include "device/cpu/cpukernel.hpp"
#include "platform/command.hpp"
#include "thread/thread.hpp"
#include "os/os.hpp"
#include "amdocl/cl_kernel.h"
#if defined(ATI_ARCH_ARM)
#include <setjmp.h>
#endif // ATI_ARCH_ARM
namespace cpu {
/*! \addtogroup CPU
* @{
*
* \addtogroup CPUExec Execution environment
* @{
*/
//! A saved stack context
class StackContext : public amd::StackObject
{
private:
#if defined(ATI_ARCH_ARM)
jmp_buf env_;
#elif defined(_WIN64)
intptr_t __declspec(align(16)) regs_[32];
#else // !_WIN64
intptr_t regs_[LP64_SWITCH(6,8)];
#endif // !_WIN64
public:
//! Save the stack context. Return 0 if returning directly.
inline intptr_t setjmp();
//! Restore the stack context
inline void longjmp(intptr_t val) const;
};
//! A thread fiber
class Fiber : public amd::StackObject
{
private:
//! Next fiber in the thread.
Fiber* next_;
//! This fiber's saved state.
StackContext context_;
public:
//! Construct a new Fiber
Fiber() : next_(NULL) { }
//! Return the next fiber in the current thread.
const Fiber* next() const { return next_; }
//! Set the next fiber in the current thread.
void setNext(Fiber* next) { next_ = next; }
//! Save the state of this fiber. Return true if directly returning.
ALWAYSINLINE bool save() { return context_.setjmp() == 0; }
//! Restore this fiber from the saved context.
void restore() const { context_.longjmp(1); }
//! Switch to the given fiber.
void swap(const Fiber* fiber) { if (save()) { fiber->restore(); } }
};
//! A CPU core operation (enqueued in the worker thread queue)
class Operation : public amd::HeapObject
{
public:
//! An atomic counter
class Counter
{
// FIXME_lmoriche: recycle the counters, implement a thread local pool.
private:
amd::Event& event_;
//! The atomic counter value.
amd::Atomic<size_t> counter_;
public:
//! Initialize the counter with the given initial value.
Counter(amd::Event& event, size_t initialValue) :
event_(event), counter_(initialValue) { }
//! Return the event associated with this counter.
amd::Event& event() { return event_; }
//! Decrement the counter and return the new value.
size_t decrement() { return --counter_; }
};
protected:
amd::Command& command_;
public:
Operation(amd::Command& command) : command_(command)
{ }
virtual ~Operation() {};
virtual void clone(Operation* buf) = 0;
void cleanup();
amd::Command& command() { return command_;}
virtual void execute() = 0;
};
/*! @}
* \defgroup CPUOperations Operations
* @{
*/
//! A work item instance
class WorkItem : public Fiber
{
private:
//! Thread info block (must be the last field).
clk_thread_info_block_t tib_;
private:
//! Cannot be deleted (allocated with placement new).
void operator delete(void*) { ShouldNotCallThis(); }
public:
//! Initialize this workgroup.
WorkItem(
const amd::NDRangeContainer& size,
void* scratchMemPtr,
void* localMemPtr);
//! Return the current WorkItem (based of the current stack pointer).
static WorkItem* current() {
return (WorkItem*)amd::alignUp((intptr_t) amd::Os::currentStackPtr(),
CLK_PRIVATE_MEMORY_SIZE) - 1;
}
clk_thread_info_block_t& infoBlock() { return tib_; }
//! Return the native stack pointer base for this workitem.
address nativeStackPtr() const {
address newSp = amd::alignDown((address) this - CPUKERNEL_STACK_ALIGN,
CPUKERNEL_STACK_ALIGN);
WINDOWS_ONLY(NOT_WIN64(newSp += sizeof(void*)));
return newSp;
}
//! These functions are mapping "n" from 1d index to the required dimension
inline void setGroupId(
const amd::NDRange& rangeLimits,
const amd::NDRange& offset,
size_t n);
inline void incrementGroupId(
const amd::NDRange& rangeLimits,
const amd::NDRange& offset,
size_t n);
//! Execute a thread synchronization barrier.
static void barrier(cl_mem_fence_flags flags);
};
typedef void (*kernelentrypoint_t)(const void*);
//! Execute a workgroup (work-items).
class WorkGroup
{
private:
amd::NDRangeKernelCommand& command_;
const cpu::Kernel& kernel_;
WorkerThread& thread_;
address params_;
WorkItem* const workItem0_;
const Fiber* workingFiber_;
size_t numWorkItems_;
public:
WorkGroup(
amd::NDRangeKernelCommand& parent,
const cpu::Kernel& kernel,
WorkerThread& thread,
address params,
WorkItem* workItem0,
const size_t numWorkItems) :
command_(parent),
kernel_(kernel),
thread_(thread),
params_(params),
workItem0_(workItem0),
numWorkItems_(numWorkItems)
{ }
WorkItem* getBaseWorkItem() { return workItem0_; }
WorkerThread& getWorkerThread() { return thread_; }
void executeWorkItem(); // In case of 1 WorkItem
void executeWithBarrier();
void executeWithoutBarrier();
void setNumWorkItems(size_t workItems) { numWorkItems_ = workItems; }
size_t getNumWorkItems() { return numWorkItems_; }
private:
void callKernelRange(
kernelentrypoint_t entryPoint,
address stackPtr,
clk_thread_info_block_t& tib);
inline void callKernel(
kernelentrypoint_t entryPoint,
address stackPtr);
inline void callKernelProtectedReturn(
kernelentrypoint_t entryPoint,
address stackPtr);
};
class NDRangeKernelBatch : public Operation
{
protected:
size_t coreId_;
const size_t numWorkGroups_;
const size_t numCores_;
volatile size_t currentOpId_;
const amd::NDRange groupIds_; //!< Number of groups in each dimensions
VirtualCPU& virtualDevice_;
public:
enum ExecutionOrder {
ORDER_DEFAULT,
ORDER_ROUND_ROBIN = ORDER_DEFAULT,
//ORDER_LINEAR
};
enum ExecutionNature {
NATURE_WITH_BARRIER,
NATURE_WITHOUT_BARRIER,
NATURE_1_WORK_ITEM,
NATURE_WG_LEVEL_EXEC
};
NDRangeKernelBatch(
amd::NDRangeKernelCommand& parent,
VirtualCPU& virtualDevice,
const amd::NDRange& groupIds, size_t numCores) :
Operation(parent),
coreId_(0),
numWorkGroups_(groupIds.product()),
numCores_(numCores),
currentOpId_(0),
groupIds_(groupIds),
virtualDevice_(virtualDevice)
{ }
virtual void clone(Operation* buf)
{
::new(buf) NDRangeKernelBatch(static_cast<amd::NDRangeKernelCommand&>(command_),
virtualDevice_, groupIds_, numCores_);
static_cast<NDRangeKernelBatch*>(buf)->setCoreId(coreId_);
}
virtual void execute();
void setCoreId(size_t coreId) { coreId_ = coreId; currentOpId_ = coreId; }
inline bool getNextOperationId(size_t& opId);
inline size_t getNextOperationIds(size_t& opId, size_t count);
private:
bool patchParameters(
const cpu::Kernel& kernel,
address params,
address& localMemPtr,
const address localMemLimit,
size_t localMemSize) const;
};
class NativeFn : public Operation
{
public:
NativeFn(amd::NativeFnCommand& parent) : Operation(parent)
{ }
virtual void clone(Operation* buf)
{
::new(buf) NativeFn(static_cast<amd::NativeFnCommand&>(command_));
}
virtual void execute();
};
#ifndef MAX
#define MAX(x,y) ((x)>=(y) ?(x) : (y))
#endif //MAX
#define MAX_OPERATION_ALLOC_SIZE (MAX(sizeof(NDRangeKernelBatch), sizeof(NativeFn)))
//! A thread bound to a cpu core.
class WorkerThread : public amd::Thread
{
private:
Fiber mainFiber_; //!< main fiber for this worker thread.
amd::Monitor queueLock_; //!< lock protecting the queue.
volatile int waitingOp_;
bool terminated_; //!< true if the thread is shutting down.
//! Local memory storage
address localDataStorage_;
//! Size of the local memory.
size_t localDataSize_;
char operation_[MAX_OPERATION_ALLOC_SIZE];
address baseWorkItemsStack_;
private:
//! Awaits operations and execute them as they become ready.
void loop();
public:
//! Construct a new WorkerThread.
WorkerThread(const cpu::Device& device);
//! Destroy the worker thread.
virtual ~WorkerThread();
//! Cleanup the thread before termination.
bool terminate();
//! Return the main fiber for this thread.
Fiber& mainFiber() { return mainFiber_; }
//! Return the LDS for this thread
address localDataStorage() const { return localDataStorage_; }
//! Return the size of the local memory for this thread.
size_t localDataSize() const { return localDataSize_; }
address baseWorkItemsStack() { return baseWorkItemsStack_; }
Operation* operation() { return reinterpret_cast<Operation*>(operation_); }
bool isOperationValid() { return waitingOp_ > 0; }
//! Enqueue a new operation to execute in this thread.
void enqueue(Operation& op);
//! Signal to start processing the commands in the queue.
void flush() { amd::ScopedLock sl(queueLock_); queueLock_.notify(); }
//! This thread's execution engine.
void run(void* data) {
loop();
}
bool isWorkerThread() const { return true; }
//! Return the currently executing WorkerThread's instance.
static WorkerThread* current()
{
return static_cast<WorkerThread*>(Thread::current());
}
};
/*! @}
* @}
*/
extern "C" intptr_t _StackContext_setjmp(intptr_t* regs);
#if !defined(ATI_ARCH_ARM)
ALWAYSINLINE
#endif
intptr_t
StackContext::setjmp()
{
#if defined(ATI_ARCH_ARM)
return ::setjmp(env_);
#else
return _StackContext_setjmp(regs_);
#endif
}
extern "C" void _StackContext_longjmp(const intptr_t* env, intptr_t val);
ALWAYSINLINE void
StackContext::longjmp(intptr_t val) const
{
#if defined(ATI_ARCH_ARM)
return ::longjmp(*const_cast<jmp_buf*>(&env_), val);
#else
return _StackContext_longjmp(regs_, val);
#endif
}
extern "C" void _WorkGroup_callKernel(
address params,
kernelentrypoint_t entryPoint,
address stackPtr);
extern "C" void _WorkGroup_callKernelProtectedReturn(
address params,
kernelentrypoint_t entryPoint,
address stackPtr);
ALWAYSINLINE void
WorkGroup::callKernel(
kernelentrypoint_t entryPoint,
address stackPtr)
{
_WorkGroup_callKernel(params_, entryPoint, stackPtr);
}
// This version support the case of changing the stack for fibers.
ALWAYSINLINE void
WorkGroup::callKernelProtectedReturn(
kernelentrypoint_t entryPoint,
address stackPtr)
{
_WorkGroup_callKernelProtectedReturn(params_, entryPoint, stackPtr);
}
} // namespace cpu
#endif /*OPERATION_HPP_*/