Files
rocm-systems/rocclr/runtime/device/cpu/cpudevice.hpp
T
foreman 539fef47eb P4 to Git Change 1208929 by emankov@em-hsa-amd on 2015/11/09 10:49:06
SWDEV-77584 - ORCA RT: Preparations for enabling HSAIL on OpenCL 1.2 by default. Integrate new algorithm for device program choice.

	[Reasons]
	1. Make the switching change as less as possible.
	2. Give a chance to test HSA_foundation device work on OCL 1.2 beforehand (asked by Nikolay).

	Almost already reviewed:
	http://ocltc.amd.com/reviews/r/8850/

	Additionally:
	1. Linking logic was changed: if the target of one of the binaries is hsail-(64) linking goes through HSAIL, otherwise - through AMDIL. Previously -cl-std=CL2.0 in any of the linking binaries was a criterion for HSAIL, what will be wrong for HSAIL 1.2 after switching. -clang & -edg options are set now to distinguish the path while linking.
	2. -cl-std=CL2.0 as a criterion for HSAIL was returned back in isHSAILProgram() method; -clang & -edg options were also added as a criterion.

	[ToDo] After enabling HSAIL by default remove -cl-std, -clang & -edg checks from the code.

	[Testing] Pre-checkin
	http://ocltc.amd.com:8111/viewModification.html?modId=61929&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true

	[Reviewers] German Andryeyev, Nikolay Haustov

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#279 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#93 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#261 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#534 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#154 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#38 edit
2015-11-09 10:56:13 -05:00

243 خطوط
6.8 KiB
C++

//
// Copyright (c) 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef CPUDEVICE_HPP_
#define CPUDEVICE_HPP_
#include "top.hpp"
#include "device/device.hpp"
#include "device/cpu/cpuvirtual.hpp"
#include "device/cpu/cpusettings.hpp"
#include "os/os.hpp"
#if defined(__linux__) && defined(NUMA_SUPPORT)
#include <numa.h>
#endif
#include "acl.h"
//! \namespace cpu CPU Device Implementation
namespace cpu {
//! Maximum number of the supported samplers
const static uint32_t MaxSamplers = 16;
//! Maximum number of supported read images
const static uint32_t MaxReadImage = 128;
//! Maximum number of supported write images
const static uint32_t MaxWriteImage = 64;
//! Maximum number of supported read/write images
const static uint32_t MaxReadWriteImage = 64;
/*! \addtogroup CPU CPU Device Implementation
* @{
*
* \addtogroup CPUDevice Device
*
* \copydoc cpu::Device
*
* @{
*/
//! A CPU device ordinal
class Device : public amd::Device
{
protected:
static aclCompiler* compiler_;
public:
aclCompiler* compiler() const { return compiler_; }
public:
static bool init(void);
//! Shutdown CPU device
static void tearDown();
//! Construct a new identifier
Device(Device* parent = NULL) :
amd::Device(parent),
workerThreadsAffinity_(NULL)
{}
virtual ~Device();
bool create();
virtual cl_int createSubDevices(
device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices);
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(
amd::CommandQueue* queue = NULL
)
{
VirtualCPU* virtualCpu = new VirtualCPU(*this);
if (virtualCpu != NULL && !virtualCpu->acceptingCommands()) {
virtualCpu->terminate();
delete virtualCpu;
virtualCpu = NULL;
}
return virtualCpu;
}
//! Compile the given source code.
virtual device::Program* createProgram(amd::option::Options* options = NULL);
//! Just returns NULL as CPU devices use the host memory
virtual device::Memory* createMemory(amd::Memory& owner) const
{
return NULL;
}
//! Sampler object allocation
virtual bool createSampler(
const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const
{
// Just return NULL on CPU device
*sampler = NULL;
return true;
}
//! Reallocates device memory obje
virtual bool reallocMemory(amd::Memory& owner) const
{
return true;
}
//! Just returns NULL as CPU devices use the host memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const
{
return NULL;
}
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
//! Return true if initialized interoperability, otherwise false
virtual bool bindExternalDevice(intptr_t type, void* pDevice, void* pContext, bool validateOnly)
{
return true; // On CPU always avail if pD3DDevice is not NULL
}
virtual bool unbindExternalDevice(intptr_t type, void* pDevice, void* pContext, bool validateOnly)
{
return true;
}
//! Gets a pointer to a region of host-visible memory for use as the target
//! of a non-blocking map for a given memory object
virtual void* allocMapTarget(
amd::Memory& mem, //!< Abstraction layer memory object
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
//! Releases non-blocking map target memory
virtual void freeMapTarget(amd::Memory& mem, void* target);
//! Empty implementation on a CPU device
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
//! Get CPU device settings
const cpu::Settings& settings() const
{ return reinterpret_cast<cpu::Settings&>(*settings_); }
bool hasAVXInstructions() const
{ return (settings().cpuFeatures_ & Settings::AVXInstructions) ? true : false; }
bool hasFMA4Instructions() const
{ return (settings().cpuFeatures_ & Settings::FMA4Instructions) ? true : false; }
static size_t getMaxWorkerThreadsNumber() { return maxWorkerThreads_; }
void setWorkerThreadsAffinity(
cl_uint numWorkerThreads,
const amd::Os::ThreadAffinityMask* threadsAffinityMask,
uint& baseCoreId);
const amd::Os::ThreadAffinityMask* getWorkerThreadsAffinity() const
{
return workerThreadsAffinity_;
}
//! host memory alloc
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const
{
return NULL;
}
//! host memory deallocation
virtual void svmFree(void* ptr) const
{
return;
}
private:
bool initSubDevice(
device::Info& info,
cl_uint maxComputeUnits,
const device::CreateSubDevicesInfo& create_info);
cl_int partitionEqually(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices);
cl_int partitionByCounts(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices);
cl_int partitionByAffinityDomainNUMA(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices);
cl_int partitionByAffinityDomainCacheLevel(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices);
private:
#if defined(__linux__) && defined(NUMA_SUPPORT)
public:
const nodemask_t* getNumaMask() const
{
return (info_.partitionCreateInfo_.type_ == device::PartitionType::BY_AFFINITY_DOMAIN &&
info_.partitionCreateInfo_.byAffinityDomain_.numa_) ?
numaMask_ : NULL;
}
private:
union {
nodemask_t* numaMask_;
amd::Os::ThreadAffinityMask* workerThreadsAffinity_; //!< As the number of compute units.
};
#else
amd::Os::ThreadAffinityMask* workerThreadsAffinity_; //!< As the number of compute units.
#endif
static size_t maxWorkerThreads_; //!< Maximum number of Worker Threads
};
/*! @}
* @}
*/
} // namespace cpu
#endif // CPUDEVICE_HPP_