6d464be252
ECR #304775 - Make optimization for read map of USWC memory - If runtime detects USWC map with read operation, then it will switch to indirect map. This should improve map-read performance on APU(s) when USWC memory is used instead of frame buffer Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#72 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#269 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#89 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#172 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#234 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#486 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#134 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#112 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#340 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.cpp#88 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.hpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.cpp#98 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#26 edit
1156 строки
34 KiB
C++
1156 строки
34 KiB
C++
//
|
|
// Copyright 2011 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "device/cpu/cpudevice.hpp"
|
|
#include "device/cpu/cpuprogram.hpp"
|
|
#include "utils/versions.hpp"
|
|
|
|
#include "amdocl/cl_common.hpp"
|
|
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#if defined(__linux__)
|
|
#if !defined(ATI_ARCH_ARM)
|
|
#include <sys/sysinfo.h>
|
|
#endif // ATI_ARCH_ARM
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#if defined(_WIN32)
|
|
# include <windows.h>
|
|
# include <intrin.h>
|
|
|
|
extern BOOL (WINAPI *pfnGetNumaNodeProcessorMaskEx)(USHORT,PGROUP_AFFINITY);
|
|
#endif // _WIN32
|
|
|
|
namespace cpu {
|
|
|
|
aclCompiler* Device::compiler_;
|
|
|
|
size_t Device::maxWorkerThreads_ = (size_t)-1;
|
|
|
|
Device::~Device()
|
|
{
|
|
#if defined(__linux__) && defined(NUMA_SUPPORT)
|
|
if (getNumaMask() != NULL) {
|
|
if (numaMask_ != NULL) {
|
|
delete numaMask_;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
if (workerThreadsAffinity_ != NULL) {
|
|
delete workerThreadsAffinity_;
|
|
}
|
|
}
|
|
void
|
|
Device::tearDown()
|
|
{
|
|
aclCompilerFini(compiler_);
|
|
}
|
|
bool
|
|
Device::init()
|
|
{
|
|
// Allow disabling of the CPU device
|
|
if (CPU_MAX_COMPUTE_UNITS == 0)
|
|
return false;
|
|
|
|
const char *library = getenv("COMPILER_LIBRARY");
|
|
aclCompilerOptions opts = {
|
|
sizeof(aclCompilerOptions_0_8),
|
|
library,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
&::malloc,
|
|
&::free
|
|
};
|
|
compiler_ = aclCompilerInit(NULL, NULL);
|
|
|
|
device::Info info;
|
|
::memset(&info, '\0', sizeof(info));
|
|
|
|
info.type_ = CL_DEVICE_TYPE_CPU;
|
|
info.vendorId_ = 0x1002;
|
|
|
|
int systemProcessorCount = amd::Os::processorCount();
|
|
info.maxComputeUnits_ = systemProcessorCount;
|
|
if (!flagIsDefault(CPU_MAX_COMPUTE_UNITS)) {
|
|
if ((CPU_MAX_COMPUTE_UNITS <= 0) || (CPU_MAX_COMPUTE_UNITS > systemProcessorCount))
|
|
info.maxComputeUnits_ = systemProcessorCount;
|
|
else
|
|
info.maxComputeUnits_ = CPU_MAX_COMPUTE_UNITS;
|
|
}
|
|
|
|
info.maxWorkItemDimensions_ = 3;
|
|
info.maxWorkGroupSize_ = CPU_MAX_WORKGROUP_SIZE;
|
|
info.maxWorkItemSizes_[0] = info.maxWorkGroupSize_;
|
|
info.maxWorkItemSizes_[1] = info.maxWorkGroupSize_;
|
|
info.maxWorkItemSizes_[2] = info.maxWorkGroupSize_;
|
|
|
|
info.addressBits_ = LP64_SWITCH(32,64);
|
|
|
|
|
|
if (CPU_IMAGE_SUPPORT) {
|
|
info.imageSupport_ = CL_TRUE;
|
|
info.maxReadImageArgs_ = MaxReadImage;
|
|
info.maxWriteImageArgs_ = MaxWriteImage;
|
|
info.image2DMaxWidth_ = 8 * Ki;
|
|
info.image2DMaxHeight_ = 8 * Ki;
|
|
info.image3DMaxWidth_ = 2 * Ki;
|
|
info.image3DMaxHeight_ = 2 * Ki;
|
|
info.image3DMaxDepth_ = 2 * Ki;
|
|
info.maxSamplers_ = MaxSamplers;
|
|
|
|
// OpenCL 1.2 device info fields
|
|
info.imageMaxBufferSize_ = 64 * Ki;
|
|
info.imageMaxArraySize_ = 2 * Ki;
|
|
|
|
info.imagePitchAlignment_ = 0;
|
|
info.imageBaseAddressAlignment_ = 0;
|
|
info.bufferFromImageSupport_ = CL_FALSE;
|
|
}
|
|
|
|
info.maxParameterSize_ = 4*Ki;
|
|
|
|
info.memBaseAddrAlign_ = 8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ?
|
|
sizeof(cl_long16) : MEMOBJ_BASE_ADDR_ALIGN);
|
|
info.minDataTypeAlignSize_ = sizeof(cl_long16);
|
|
|
|
info.singleFPConfig_ =
|
|
CL_FP_DENORM | CL_FP_INF_NAN |
|
|
CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO |
|
|
CL_FP_ROUND_TO_INF | CL_FP_FMA;
|
|
|
|
info.doubleFPConfig_ = info.singleFPConfig_;
|
|
info.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
|
|
|
|
info.affinityDomain_.value_ = 0;
|
|
info.affinityDomain_.next_ = 1;
|
|
|
|
info.globalMemCacheType_ = CL_READ_WRITE_CACHE;
|
|
|
|
#if defined(__linux__)
|
|
|
|
info.globalMemCacheLineSize_ = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
|
info.globalMemCacheSize_ = sysconf(_SC_LEVEL1_DCACHE_SIZE);
|
|
info.affinityDomain_.cacheL1_ = 1;
|
|
|
|
if (sysconf(_SC_LEVEL2_CACHE_SIZE) > 0) {
|
|
info.affinityDomain_.cacheL2_ = 1;
|
|
}
|
|
if (sysconf(_SC_LEVEL3_CACHE_SIZE) > 0) {
|
|
info.affinityDomain_.cacheL3_ = 1;
|
|
}
|
|
if (sysconf(_SC_LEVEL4_CACHE_SIZE) > 0) {
|
|
info.affinityDomain_.cacheL4_ = 1;
|
|
}
|
|
|
|
#if defined(NUMA_SUPPORT)
|
|
if (numa_available() != -1 && numa_max_node() => 0) {
|
|
info.affinityDomain_.numa_ = 1;
|
|
}
|
|
#endif
|
|
|
|
#else // win32
|
|
|
|
DWORD length = 0;
|
|
::GetLogicalProcessorInformation(NULL, &length);
|
|
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer =
|
|
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc(length);
|
|
|
|
if (buffer != NULL && ::GetLogicalProcessorInformation(buffer, &length)) {
|
|
bool found = false;
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr, limit =
|
|
&buffer[length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)];
|
|
for (ptr = buffer; ptr < limit; ++ptr) {
|
|
PCACHE_DESCRIPTOR cache = &ptr->Cache;
|
|
if (ptr->Relationship == RelationCache && cache->Type != CacheInstruction) {
|
|
info.affinityDomain_.value_ |=
|
|
(device::AffinityDomain::AFFINITY_DOMAIN_L1_CACHE << 1) >>
|
|
cache->Level;
|
|
|
|
if (!found && cache->Level == 1) {
|
|
info.globalMemCacheLineSize_ = cache->LineSize;
|
|
info.globalMemCacheSize_ = cache->Size;
|
|
found = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
free(buffer);
|
|
|
|
ULONG highestNuma = 0;
|
|
if (::GetNumaHighestNodeNumber(&highestNuma) && highestNuma != 0) {
|
|
info.affinityDomain_.numa_ = 1;
|
|
}
|
|
|
|
#endif
|
|
|
|
uintptr_t virtualMemSize;
|
|
|
|
#if defined(__linux__)
|
|
#if !defined(ATI_ARCH_ARM)
|
|
struct sysinfo si;
|
|
|
|
if (sysinfo(&si) != 0) {
|
|
return false;
|
|
}
|
|
if (si.mem_unit == 0) {
|
|
// Linux kernels prior to 2.3.23 return sizes in bytes.
|
|
si.mem_unit = 1;
|
|
}
|
|
info.globalMemSize_ = (cl_ulong) si.totalram * si.mem_unit;
|
|
#else
|
|
info.globalMemSize_ = 0;
|
|
#endif
|
|
virtualMemSize = (uintptr_t) info.globalMemSize_;
|
|
#else
|
|
MEMORYSTATUSEX statex;
|
|
statex.dwLength = sizeof (statex);
|
|
|
|
if (GlobalMemoryStatusEx (&statex) == 0) {
|
|
return false;
|
|
}
|
|
info.globalMemSize_ = (cl_ulong) statex.ullTotalPhys;
|
|
virtualMemSize =
|
|
(uintptr_t) std::min(statex.ullTotalPageFile, statex.ullTotalVirtual);
|
|
#endif
|
|
|
|
maxWorkerThreads_ = (size_t) (virtualMemSize /
|
|
(uintptr_t) ((CPU_WORKER_THREAD_STACK_SIZE +
|
|
CLK_PRIVATE_MEMORY_SIZE * (CPU_MAX_WORKGROUP_SIZE + 1))) *
|
|
7 / 10);
|
|
|
|
#if defined(_LP64)
|
|
// Cap at 8TiB for 64-bit
|
|
const cl_ulong maxGlobalMemSize = 8ULL*Ki*Gi;
|
|
#elif defined(_WIN32)
|
|
// Cap at 2GiB (see http://msdn.microsoft.com/en-us/library/aa366778.aspx)
|
|
const cl_ulong maxGlobalMemSize = 2ULL*Gi;
|
|
#else // linux
|
|
// Cap at 3.5GiB
|
|
const cl_ulong maxGlobalMemSize = 3584ULL*Mi;
|
|
#endif
|
|
info.globalMemSize_ = std::min(info.globalMemSize_, maxGlobalMemSize);
|
|
|
|
info.maxMemAllocSize_ = info.globalMemSize_ * CPU_MAX_ALLOC_PERCENT / 100;
|
|
if (flagIsDefault(CPU_MAX_ALLOC_PERCENT)) {
|
|
const cl_ulong minAllocSize = LP64_SWITCH(1ULL*Gi, 2ULL*Gi);
|
|
info.maxMemAllocSize_ = std::max(info.maxMemAllocSize_,
|
|
std::min(info.globalMemSize_, minAllocSize));
|
|
}
|
|
|
|
info.maxConstantBufferSize_ = 64*Ki;
|
|
info.maxConstantArgs_ = 8;
|
|
|
|
info.localMemType_ = CL_GLOBAL;
|
|
info.localMemSize_ = std::max((cl_ulong)32*Ki, info.globalMemCacheSize_/2);
|
|
|
|
info.errorCorrectionSupport_ = CL_FALSE;
|
|
info.hostUnifiedMemory_ = CL_TRUE;
|
|
info.profilingTimerResolution_ = (size_t)amd::Os::timerResolutionNanos();
|
|
info.profilingTimerOffset_ = amd::Os::offsetToEpochNanos();
|
|
info.littleEndian_ = CL_TRUE;
|
|
info.available_ = CL_TRUE;
|
|
info.compilerAvailable_ = CL_TRUE;
|
|
info.linkerAvailable_ = CL_TRUE;
|
|
|
|
info.executionCapabilities_ = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL;
|
|
// Enable SVM only for OpenCL 2.0
|
|
if (((OPENCL_MAJOR >= 2) && (CPU_OPENCL_VERSION >= 200)) || OCL_FORCE_CPU_SVM) {
|
|
info.svmCapabilities_ = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
|
|
CL_DEVICE_SVM_FINE_GRAIN_BUFFER |
|
|
CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
|
|
CL_DEVICE_SVM_ATOMICS;
|
|
}
|
|
info.preferredPlatformAtomicAlignment_ = 0;
|
|
info.preferredGlobalAtomicAlignment_ = 0;
|
|
info.preferredLocalAtomicAlignment_ = 0;
|
|
info.queueProperties_ = CL_QUEUE_PROFILING_ENABLE;
|
|
|
|
info.platform_ = AMD_PLATFORM;
|
|
|
|
#if defined(__linux__)
|
|
|
|
std::ifstream ifs("/proc/cpuinfo", std::ios::in);
|
|
if (ifs.is_open()) {
|
|
std::string line;
|
|
bool vendor = false;
|
|
bool name = false;
|
|
bool freq = false;
|
|
|
|
while (std::getline(ifs, line) && !(vendor && name && freq)) {
|
|
if (!vendor && (line.find("vendor_id\t: ")
|
|
!= std::string::npos)) {
|
|
::strcpy(
|
|
info.vendor_,
|
|
line.substr(line.find_first_of(':') + 2).c_str());
|
|
vendor = true;
|
|
}
|
|
else if (!name && (line.find("model name\t: ") != std::string::npos
|
|
|| line.find("Processor\t: ") != std::string::npos)) {
|
|
::strcpy(
|
|
info.name_,
|
|
line.substr(line.find_first_of(':') + 2).c_str());
|
|
name = true;
|
|
}
|
|
else if (!freq && (line.find("cpu MHz\t\t: ")
|
|
!= std::string::npos)) {
|
|
info.maxClockFrequency_ =
|
|
::atoi(line.substr(line.find_first_of(':') + 2).c_str());
|
|
freq = true;
|
|
}
|
|
}
|
|
ifs.close();
|
|
}
|
|
|
|
#elif defined(_WIN32)
|
|
|
|
int CPUInfo[4] = {-1};
|
|
int nRet = 0;
|
|
unsigned nIds, nExIds, i;
|
|
|
|
// cpuid with an InfoType argument of 0 returns the number of
|
|
// valid Ids in CPUInfo[0] and the CPU identification string in
|
|
// the other three array elements. The CPU identification string is
|
|
// not in linear order. The code below arranges the information
|
|
// in a human readable form.
|
|
amd::Os::cpuid(CPUInfo, 0);
|
|
nIds = CPUInfo[0];
|
|
memset(info.vendor_, 0, sizeof(info.vendor_));
|
|
*((int*)(info.vendor_+0)) = CPUInfo[1];
|
|
*((int*)(info.vendor_+4)) = CPUInfo[3];
|
|
*((int*)(info.vendor_+8)) = CPUInfo[2];
|
|
|
|
// Calling cpuid with 0x80000000 as the InfoType argument
|
|
// gets the number of valid extended IDs.
|
|
amd::Os::cpuid(CPUInfo, 0x80000000);
|
|
nExIds = CPUInfo[0];
|
|
memset(info.name_, 0, sizeof(info.name_));
|
|
sprintf(info.name_, "Unknown Processor");
|
|
|
|
// Get the information associated with each extended ID.
|
|
for (i=0x80000000; i<=nExIds; ++i)
|
|
{
|
|
amd::Os::cpuid(CPUInfo, i);
|
|
// Interpret CPU brand string and cache information.
|
|
if (i == 0x80000002)
|
|
memcpy(info.name_, CPUInfo, sizeof(CPUInfo));
|
|
else if (i == 0x80000003)
|
|
memcpy(info.name_ + 16, CPUInfo, sizeof(CPUInfo));
|
|
else if (i == 0x80000004)
|
|
memcpy(info.name_ + 32, CPUInfo, sizeof(CPUInfo));
|
|
}
|
|
|
|
|
|
info.maxClockFrequency_ = 0;
|
|
HKEY hKey;
|
|
|
|
// Open the key
|
|
if (RegOpenKeyEx(
|
|
HKEY_LOCAL_MACHINE,
|
|
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0\\",
|
|
0, KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS) {
|
|
|
|
// Read the value
|
|
DWORD dwLen = 4;
|
|
RegQueryValueEx(
|
|
hKey, "~MHz", NULL, NULL,
|
|
(LPBYTE)&info.maxClockFrequency_, &dwLen);
|
|
|
|
// Cleanup and return
|
|
RegCloseKey(hKey);
|
|
}
|
|
|
|
#else
|
|
::strcpy(info.name_, "Unknown Processor");
|
|
::strcpy(info.vendor_, "Unknown Vendor");
|
|
info.maxClockFrequency_ = 0;
|
|
#endif
|
|
|
|
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
|
|
|
|
info.profile_ = "FULL_PROFILE";
|
|
if (CPU_OPENCL_VERSION < 200) {
|
|
info.version_ = "OpenCL 1.2 " AMD_PLATFORM_INFO;
|
|
info.oclcVersion_ = "OpenCL C 1.2 ";
|
|
}
|
|
else {
|
|
info.version_ = "OpenCL " OPENCL_VERSION_STR " " AMD_PLATFORM_INFO;
|
|
info.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
|
|
}
|
|
info.spirVersions_ = "1.2";
|
|
|
|
#if cl_amd_open_video
|
|
info.openVideo_ = CL_FALSE;
|
|
#endif // cl_amd_open_video
|
|
|
|
info.partitionCreateInfo_.type_.value_ = 0;
|
|
info.partitionProperties_.value_ = 0;
|
|
if (info.maxComputeUnits_ > 1) {
|
|
info.partitionProperties_.equally_ = 1;
|
|
info.partitionProperties_.byCounts_ = 1;
|
|
if (info.affinityDomain_.value_ != 0) {
|
|
info.partitionProperties_.byAffinityDomain_ = 1;
|
|
}
|
|
}
|
|
else {
|
|
info.affinityDomain_.value_ = 0;
|
|
}
|
|
|
|
// Copy the name into the boardName data member for CPU implementation.
|
|
// ::strncpy(info.boardName_, info.name_, sizeof(info.boardName_));
|
|
memset(info.boardName_, 0, sizeof(info.boardName_));
|
|
|
|
Device* device = new Device();
|
|
|
|
if (device == NULL || !device->create()) {
|
|
delete device;
|
|
return false;
|
|
}
|
|
|
|
::snprintf(info.driverVersion_, sizeof(info.driverVersion_) - 1,
|
|
"%s (%s%s%s)", AMD_BUILD_STRING,
|
|
#if defined(ATI_ARCH_X86)
|
|
"sse2",
|
|
#else // !ATI_ARCH_X86
|
|
"",
|
|
#endif // !ATI_ARCH_X86
|
|
device->hasAVXInstructions() ? ",avx" : "",
|
|
device->hasFMA4Instructions() ? ",fma4" : "");
|
|
|
|
// These will need to change for AVX2
|
|
info.preferredVectorWidthChar_ = 16;
|
|
info.preferredVectorWidthShort_ = 8;
|
|
info.preferredVectorWidthInt_ = 4;
|
|
info.preferredVectorWidthLong_ = 2;
|
|
if (device->hasAVXInstructions()) {
|
|
info.preferredVectorWidthFloat_ = 8;
|
|
info.preferredVectorWidthDouble_ = 4;
|
|
} else {
|
|
info.preferredVectorWidthFloat_ = 4;
|
|
info.preferredVectorWidthDouble_ = 2;
|
|
}
|
|
info.preferredVectorWidthHalf_ = 0; // no half support
|
|
|
|
// Same here, will need to change for AVX2
|
|
info.nativeVectorWidthChar_ = 16;
|
|
info.nativeVectorWidthShort_ = 8;
|
|
info.nativeVectorWidthInt_ = 4;
|
|
info.nativeVectorWidthLong_ = 2;
|
|
if (device->hasAVXInstructions()) {
|
|
info.nativeVectorWidthFloat_ = 8;
|
|
info.nativeVectorWidthDouble_ = 4;
|
|
} else {
|
|
info.nativeVectorWidthFloat_ = 4;
|
|
info.nativeVectorWidthDouble_ = 2;
|
|
}
|
|
info.nativeVectorWidthHalf_ = 0; // no half support
|
|
|
|
// Find all supported device extensions
|
|
info.extensions_ = device->getExtensionString();
|
|
|
|
// OpenCL 1.2 device info fields
|
|
info.builtInKernels_ = "";
|
|
info.preferredInteropUserSync_ = true;
|
|
info.printfBufferSize_ = 64*Ki;
|
|
|
|
info.maxPipePacketSize_ = info.maxMemAllocSize_;
|
|
info.maxPipeActiveReservations_ = 16;
|
|
info.maxPipeArgs_ = 16;
|
|
info.maxReadWriteImageArgs_ = MaxReadWriteImage;
|
|
|
|
// Max size should not be bigger than 1.75 GB
|
|
const cl_ulong maxSize = std::min(static_cast<cl_ulong>((Gi/4)*7),
|
|
info.maxMemAllocSize_);
|
|
info.maxGlobalVariableSize_ = static_cast<size_t>(maxSize);
|
|
info.globalVariablePreferredTotalSize_ = static_cast<size_t>(maxSize);
|
|
|
|
device->info_ = info;
|
|
device->registerDevice();
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Device::create()
|
|
{
|
|
// Create CPU settings
|
|
settings_ = new cpu::Settings();
|
|
cpu::Settings* cpuSettings = reinterpret_cast<cpu::Settings*>(settings_);
|
|
|
|
if ((cpuSettings == NULL) || !cpuSettings->create()) {
|
|
return false;
|
|
}
|
|
|
|
#if defined(ATI_ARCH_X86)
|
|
// Check that we have at least SSE2
|
|
if (settings().cpuFeatures_ == 0) {
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Device::initSubDevice(
|
|
device::Info& info,
|
|
cl_uint maxComputeUnits,
|
|
const device::CreateSubDevicesInfo& create_info)
|
|
{
|
|
if (workerThreadsAffinity_ == NULL) {
|
|
workerThreadsAffinity_ = new amd::Os::ThreadAffinityMask;
|
|
if (workerThreadsAffinity_ == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
info_ = info;
|
|
info_.maxComputeUnits_ = maxComputeUnits;
|
|
info_.partitionCreateInfo_ = create_info.p_;
|
|
if (create_info.p_.type_.value_ == device::PartitionType::BY_COUNTS) {
|
|
cl_uint* countsList = new cl_uint[create_info.p_.byCounts_.listSize_];
|
|
if (countsList == NULL) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i < create_info.p_.byCounts_.listSize_; ++i) {
|
|
countsList[i] = create_info.countsListAt(i);
|
|
}
|
|
info_.partitionCreateInfo_.byCounts_.countsList_ = countsList;
|
|
}
|
|
|
|
// The device cannot be partitioned further
|
|
if (maxComputeUnits == 1) {
|
|
info_.partitionProperties_.value_ = 0;
|
|
info_.affinityDomain_.value_ = 0;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Device::setWorkerThreadsAffinity(
|
|
cl_uint numWorkerThreads,
|
|
const amd::Os::ThreadAffinityMask* threadsAffinityMask,
|
|
uint& baseCoreId)
|
|
{
|
|
uint coreId = baseCoreId;
|
|
if (threadsAffinityMask == NULL) {
|
|
for (cl_uint i = 0; i < numWorkerThreads; ++i) {
|
|
++coreId;
|
|
workerThreadsAffinity_->set(coreId);
|
|
}
|
|
}
|
|
else { // Already has affinity, so filter accordingly
|
|
for (cl_uint i = 0; i < numWorkerThreads; ++i) {
|
|
coreId = threadsAffinityMask->getNextSet(coreId);
|
|
workerThreadsAffinity_->set(coreId);
|
|
}
|
|
}
|
|
baseCoreId = coreId;
|
|
}
|
|
|
|
cl_int
|
|
Device::createSubDevices(
|
|
device::CreateSubDevicesInfo& create_info,
|
|
cl_uint num_entries,
|
|
cl_device_id* devices,
|
|
cl_uint* num_devices)
|
|
{
|
|
switch (create_info.p_.type_.value_) {
|
|
case device::PartitionType::EQUALLY:
|
|
return partitionEqually(
|
|
create_info, num_entries, devices, num_devices);
|
|
|
|
case device::PartitionType::BY_COUNTS:
|
|
return partitionByCounts(
|
|
create_info, num_entries, devices, num_devices);
|
|
|
|
case device::PartitionType::BY_AFFINITY_DOMAIN:
|
|
if (info_.affinityDomain_.value_ == 0) {
|
|
return CL_DEVICE_PARTITION_FAILED;
|
|
}
|
|
|
|
if (create_info.p_.byAffinityDomain_.next_) {
|
|
create_info.p_.byAffinityDomain_.next_ = 0;
|
|
create_info.p_.byAffinityDomain_.value_ =
|
|
(1 << amd::leastBitSet(info_.affinityDomain_.value_));
|
|
}
|
|
else {
|
|
if ((create_info.p_.byAffinityDomain_.value_ &
|
|
info_.affinityDomain_.value_) == 0) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
}
|
|
|
|
if (create_info.p_.byAffinityDomain_.numa_) {
|
|
return partitionByAffinityDomainNUMA(
|
|
create_info, num_entries, devices, num_devices);
|
|
}
|
|
else {
|
|
return partitionByAffinityDomainCacheLevel(
|
|
create_info, num_entries, devices, num_devices);
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
cl_int
|
|
Device::partitionEqually(
|
|
const device::CreateSubDevicesInfo& create_info,
|
|
cl_uint num_entries,
|
|
cl_device_id* devices,
|
|
cl_uint* num_devices)
|
|
{
|
|
cl_uint subComputeUnits =
|
|
(cl_uint)create_info.p_.equally_.numComputeUnits_;
|
|
if (subComputeUnits == 0) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
cl_uint numSubDevices = info_.maxComputeUnits_ / subComputeUnits;
|
|
if (numSubDevices == 0) {
|
|
return CL_DEVICE_PARTITION_FAILED;
|
|
}
|
|
|
|
if (num_devices != NULL) {
|
|
*num_devices = numSubDevices;
|
|
}
|
|
|
|
if (devices != NULL) {
|
|
if (num_entries < numSubDevices) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
uint coreId = (uint)-1;
|
|
while (numSubDevices-- > 0) {
|
|
Device* device = new Device(this);
|
|
if (device == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
if (!device->create() ||
|
|
!device->initSubDevice(info_, subComputeUnits, create_info)) {
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
device->setWorkerThreadsAffinity(
|
|
subComputeUnits, workerThreadsAffinity_, coreId);
|
|
*devices++ = as_cl(static_cast<amd::Device*>(device));
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
cl_int
|
|
Device::partitionByCounts(
|
|
const device::CreateSubDevicesInfo& create_info,
|
|
cl_uint num_entries,
|
|
cl_device_id* devices,
|
|
cl_uint* num_devices)
|
|
{
|
|
cl_uint maxComputeUnits = 0;
|
|
cl_uint numSubDevices = (cl_uint)create_info.p_.byCounts_.listSize_;
|
|
for (size_t i = (size_t)numSubDevices; i > 0; --i) {
|
|
maxComputeUnits += create_info.countsListAt(i);
|
|
}
|
|
if (numSubDevices == 0 || maxComputeUnits > info_.maxComputeUnits_) {
|
|
return CL_INVALID_DEVICE_PARTITION_COUNT;
|
|
}
|
|
|
|
if (num_devices != NULL) {
|
|
*num_devices = numSubDevices;
|
|
}
|
|
|
|
if (devices != NULL) {
|
|
if (num_entries < numSubDevices) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
uint coreId = (uint)-1;
|
|
while (numSubDevices-- > 0) {
|
|
Device* device = new Device(this);
|
|
if (device == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
cl_uint subComputeUnits =
|
|
create_info.countsListAt((size_t)numSubDevices);
|
|
if (!device->create() ||
|
|
!device->initSubDevice(info_, subComputeUnits, create_info)) {
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
device->setWorkerThreadsAffinity(
|
|
subComputeUnits, workerThreadsAffinity_, coreId);
|
|
*devices++ = as_cl(static_cast<amd::Device*>(device));
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
cl_int
|
|
Device::partitionByAffinityDomainNUMA(
|
|
const device::CreateSubDevicesInfo& create_info,
|
|
cl_uint num_entries,
|
|
cl_device_id* devices,
|
|
cl_uint* num_devices)
|
|
{
|
|
cl_uint numSubDevices = 0;
|
|
|
|
#if defined(__linux__)
|
|
#if !defined(NUMA_SUPPORT)
|
|
return CL_INVALID_VALUE;
|
|
#else
|
|
int highestNuma = numa_max_node();
|
|
if (highestNuma < 0) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
numSubDevices = (cl_uint)highestNuma;
|
|
if (devices != NULL) {
|
|
for (int node = 0; node <= highestNuma; ++node) {
|
|
cl_uint subComputeUnits = 0;
|
|
int len = 1;
|
|
while (true) {
|
|
ulong* cpus = alloca(sizeof(ulong)*len);
|
|
if (numa_node_to_cpus(node, cpus, len * sizeof(ulong)) < 0) {
|
|
if (errno != ERANGE) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
len *= 2;
|
|
}
|
|
else {
|
|
len *= sizeof(ulong) * 8;
|
|
for (int i = 0; i < len; i++) {
|
|
if (test_bit(i, cpus)) {
|
|
++subComputeUnits;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (subComputeUnits == 0) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
Device* device = new Device(this);
|
|
if (device == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
if (!device->create() || NULL == (device->numaMask_ = new nodemask_t)) {
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
|
|
if (!device->initSubDevice(
|
|
info_, subComputeUnits, create_info)) {
|
|
delete device->numaMask_;
|
|
device->numaMask_ = NULL;
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
nodemask_zero(device->numaMask_);
|
|
nodemask_set(device->numaMask_, node);
|
|
// Need to remove this domain type
|
|
device->info_.affinityDomain_.numa_ = 0;
|
|
*devices++ = as_cl(static_cast<amd::Device*>(device));
|
|
}
|
|
}
|
|
#endif // NUMA_SUPPORT
|
|
|
|
#else // win32
|
|
GROUP_AFFINITY numaNodeMask;
|
|
ULONG highestNuma = 0;
|
|
if (!::GetNumaHighestNodeNumber(&highestNuma)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
for (ULONG node = 0; node <= highestNuma; ++node) {
|
|
if (pfnGetNumaNodeProcessorMaskEx != NULL) {
|
|
if (!pfnGetNumaNodeProcessorMaskEx((USHORT)node, &numaNodeMask)) {
|
|
// Highet NUMA node number is not guaranteed to be the
|
|
// number of nodes.
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
ULONGLONG tmpMask;
|
|
if (!::GetNumaNodeProcessorMask((UCHAR)node, &tmpMask)) {
|
|
// Highet NUMA node number is not guaranteed to be the
|
|
// number of nodes.
|
|
continue;
|
|
}
|
|
numaNodeMask.Group = 0;
|
|
numaNodeMask.Mask = (KAFFINITY)tmpMask;
|
|
}
|
|
|
|
if (workerThreadsAffinity_ != NULL) {
|
|
workerThreadsAffinity_->adjust(0, numaNodeMask.Mask);
|
|
}
|
|
if (numaNodeMask.Mask == 0) {
|
|
continue;
|
|
}
|
|
|
|
if (devices != NULL) {
|
|
Device* device = new Device(this);
|
|
if (device == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
if (!device->create() || !device->initSubDevice(info_,
|
|
(cl_uint)amd::countBitsSet(numaNodeMask.Mask), create_info)) {
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
device->workerThreadsAffinity_->set(
|
|
numaNodeMask.Group, numaNodeMask.Mask);
|
|
// Need to remove this domain type
|
|
device->info_.affinityDomain_.numa_ = 0;
|
|
*devices++ = as_cl(static_cast<amd::Device*>(device));
|
|
}
|
|
numSubDevices++;
|
|
}
|
|
|
|
#endif // win32
|
|
|
|
if (num_devices != NULL) {
|
|
*num_devices = numSubDevices;
|
|
}
|
|
|
|
// Could not get a processor mask for any of the nodes
|
|
if (numSubDevices == 0) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
#if defined(__linux__)
|
|
static bool
|
|
readFileString(const char* file, char* buf, size_t bufSize)
|
|
{
|
|
int fd = open(file, O_RDONLY);
|
|
if (fd < 0) {
|
|
return false;
|
|
}
|
|
|
|
struct stat st;
|
|
if (fstat(fd, &st) < 0) {
|
|
close(fd);
|
|
return false;
|
|
}
|
|
|
|
if ((size_t)st.st_size < bufSize) {
|
|
bufSize = (size_t)st.st_size;
|
|
}
|
|
|
|
ssize_t n = read(fd, buf, bufSize);
|
|
close(fd);
|
|
|
|
if (n <= 0) {
|
|
return false;
|
|
}
|
|
|
|
if (n >= (ssize_t)bufSize) {
|
|
n = (ssize_t)bufSize - 1;
|
|
}
|
|
buf[n] = '\0';
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
parseSharedCpuMap(const char* cpuMap, cpu_set_t& mask)
|
|
{
|
|
CPU_ZERO(&mask);
|
|
uint32_t* bits = (uint32_t*)mask.__bits;
|
|
const char* s = cpuMap + strlen(cpuMap);
|
|
while (true) {
|
|
s = (const char*)memrchr(cpuMap, ',', s - cpuMap);
|
|
if (!s) {
|
|
s = cpuMap;
|
|
}
|
|
else {
|
|
s++;
|
|
}
|
|
|
|
*bits++ = strtoul(s, NULL, 16);
|
|
|
|
if (s == cpuMap) {
|
|
return;
|
|
}
|
|
|
|
--s;
|
|
}
|
|
}
|
|
#endif // linux
|
|
|
|
cl_int
|
|
Device::partitionByAffinityDomainCacheLevel(
|
|
const device::CreateSubDevicesInfo& create_info,
|
|
cl_uint num_entries,
|
|
cl_device_id* devices,
|
|
cl_uint* num_devices)
|
|
{
|
|
cl_uint cacheLevel = 0;
|
|
switch (create_info.p_.byAffinityDomain_.value_) {
|
|
case device::AffinityDomain::AFFINITY_DOMAIN_L4_CACHE:
|
|
cacheLevel = 4;
|
|
break;
|
|
case device::AffinityDomain::AFFINITY_DOMAIN_L3_CACHE:
|
|
cacheLevel = 3;
|
|
break;
|
|
case device::AffinityDomain::AFFINITY_DOMAIN_L2_CACHE:
|
|
cacheLevel = 2;
|
|
break;
|
|
case device::AffinityDomain::AFFINITY_DOMAIN_L1_CACHE:
|
|
cacheLevel = 1;
|
|
break;
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
const uint negAffinityDomain =
|
|
~create_info.p_.byAffinityDomain_.value_;
|
|
cl_uint numSubDevices = 0;
|
|
|
|
#if defined(__linux__)
|
|
|
|
amd::Os::ThreadAffinityMask affinityMask;
|
|
if (workerThreadsAffinity_ != NULL) {
|
|
affinityMask = *workerThreadsAffinity_;
|
|
}
|
|
else {
|
|
for (uint cpuId = 0; cpuId < (uint)info_.maxComputeUnits_; ++cpuId) {
|
|
affinityMask.set(cpuId);
|
|
}
|
|
}
|
|
|
|
amd::Os::ThreadAffinityMask currentMask;
|
|
char buf[1024];
|
|
for (uint cpuId = affinityMask.getFirstSet();
|
|
cpuId != (uint)-1;
|
|
cpuId = affinityMask.getNextSet(cpuId)) {
|
|
|
|
sprintf(buf,
|
|
"/sys/devices/system/cpu/cpu%u/cache/index%u/shared_cpu_map",
|
|
cpuId, cacheLevel);
|
|
|
|
if (!readFileString(buf, buf, sizeof(buf))) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
parseSharedCpuMap(buf, currentMask.getNative());
|
|
affinityMask.adjust(currentMask.getNative());
|
|
if (currentMask.isEmpty()) {
|
|
continue;
|
|
}
|
|
|
|
cl_uint maxComputeUnits;
|
|
if (cacheLevel > 1) {
|
|
maxComputeUnits = 0;
|
|
amd::Os::ThreadAffinityMask currentMaskSub;
|
|
cl_uint cacheLevelSub = cacheLevel - 1;
|
|
for (uint cpuIdSub = affinityMask.getFirstSet();
|
|
cpuIdSub != (uint)-1;
|
|
cpuIdSub = affinityMask.getNextSet(cpuIdSub)) {
|
|
|
|
sprintf(buf,
|
|
"/sys/devices/system/cpu/cpu%u/cache/index%u/shared_cpu_map",
|
|
cpuIdSub, cacheLevelSub);
|
|
|
|
if (!readFileString(buf, buf, sizeof(buf))) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
parseSharedCpuMap(buf, currentMaskSub.getNative());
|
|
currentMask.adjust(currentMaskSub.getNative());
|
|
if (!currentMaskSub.isEmpty()) {
|
|
++maxComputeUnits;
|
|
}
|
|
}
|
|
|
|
if (maxComputeUnits == 0) {
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
maxComputeUnits = 1;
|
|
}
|
|
|
|
if (devices != NULL) {
|
|
Device* device = new Device(this);
|
|
if (device == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
if (!device->create() ||
|
|
!device->initSubDevice(info_, maxComputeUnits, create_info)) {
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
device->workerThreadsAffinity_->set(currentMask.getNative());
|
|
// Need to remove this domain type
|
|
device->info_.affinityDomain_.value_ &= negAffinityDomain;
|
|
*devices++ = as_cl(static_cast<amd::Device*>(device));
|
|
}
|
|
numSubDevices++;
|
|
affinityMask.clear(currentMask.getNative());
|
|
}
|
|
|
|
#else // win32
|
|
DWORD length = 0;
|
|
::GetLogicalProcessorInformation(NULL, &length);
|
|
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer =
|
|
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc(length);
|
|
|
|
if (buffer != NULL && ::GetLogicalProcessorInformation(buffer, &length)) {
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr, limit =
|
|
&buffer[length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)];
|
|
|
|
for (ptr = buffer; ptr < limit; ++ptr) {
|
|
PCACHE_DESCRIPTOR cache = &ptr->Cache;
|
|
if (ptr->Relationship == RelationCache && cache->Type != CacheInstruction) {
|
|
if (cache->Level == cacheLevel) {
|
|
KAFFINITY affinityMask = (KAFFINITY)ptr->ProcessorMask;
|
|
if (workerThreadsAffinity_ != NULL) {
|
|
workerThreadsAffinity_->adjust(0, affinityMask);
|
|
}
|
|
if (affinityMask == 0) {
|
|
continue;
|
|
}
|
|
|
|
cl_uint maxComputeUnits;
|
|
if (cacheLevel > 1) {
|
|
maxComputeUnits = 0;
|
|
cl_uint cacheLevelSub = cacheLevel - 1;
|
|
for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION
|
|
ptrSub = buffer; ptrSub < limit; ++ptrSub) {
|
|
|
|
PCACHE_DESCRIPTOR cacheSub = &ptrSub->Cache;
|
|
if (ptrSub->Relationship == RelationCache &&
|
|
cacheSub->Type != CacheInstruction) {
|
|
if (cacheSub->Level == cacheLevelSub &&
|
|
((affinityMask & (KAFFINITY)ptrSub->ProcessorMask) != 0)) {
|
|
++maxComputeUnits;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (maxComputeUnits == 0) {
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
maxComputeUnits = 1;
|
|
}
|
|
|
|
if (devices != NULL) {
|
|
Device* device = new Device(this);
|
|
if (device == NULL) {
|
|
free(buffer);
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
if (!device->create() || !device->initSubDevice(info_,
|
|
maxComputeUnits, create_info)) {
|
|
free(buffer);
|
|
device->release();
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
device->workerThreadsAffinity_->set(0, affinityMask);
|
|
// Need to remove this domain type
|
|
device->info_.affinityDomain_.value_ &= negAffinityDomain;
|
|
*devices++ = as_cl(static_cast<amd::Device*>(device));
|
|
}
|
|
numSubDevices++;
|
|
if (numSubDevices >= info_.maxComputeUnits_) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
free(buffer);
|
|
|
|
#endif
|
|
|
|
if (num_devices != NULL) {
|
|
*num_devices = numSubDevices;
|
|
}
|
|
|
|
if (numSubDevices == 0) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
device::Program*
|
|
Device::createProgram(int oclVer)
|
|
{
|
|
Program* cpuProgram = new Program(*this);
|
|
if (cpuProgram == NULL) {
|
|
LogError("We failed memory allocation for program!");
|
|
}
|
|
|
|
return cpuProgram;
|
|
}
|
|
|
|
void*
|
|
Device::allocMapTarget(
|
|
amd::Memory& mem,
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& region,
|
|
uint mapFlags,
|
|
size_t* rowPitch,
|
|
size_t* slicePitch)
|
|
{
|
|
if (mem.asImage() != NULL) {
|
|
amd::Image * image = mem.asImage();
|
|
size_t elementSize = image->getImageFormat().getElementSize();
|
|
size_t rp = image->getRowPitch();
|
|
size_t sp = image->getSlicePitch();
|
|
*rowPitch = rp;
|
|
if (slicePitch) {
|
|
*slicePitch = sp;
|
|
}
|
|
return (address) image->getHostMem()
|
|
+ (origin[0] * elementSize + origin[1] * rp + origin[2] * sp);
|
|
}
|
|
else if (mem.asBuffer() != NULL) {
|
|
return (address) mem.getHostMem() + origin[0];
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
Device::freeMapTarget(amd::Memory& mem, void* target)
|
|
{
|
|
// nop for CPU
|
|
}
|
|
|
|
} // namespace cpu
|