P4 to Git Change 1536925 by vsytchen@vsytchen-ocl-win10 on 2018/04/04 17:20:38

SWDEV-79445 - OCL generic changes and code clean-up

	1. This change replaces the use of std::map with std::unordered_map to improve lookup/insert time.
	2. Replace the use of std::make_pair and std::pair constructor with uniform initialization for cleaner code.
	3. Replace the use of std::Container::iterator type with the auto keyword for cleaner code.
	4. Use range based for loops where needed.

	ReviewBoardURL = http://ocltc.amd.com/reviews/r/14517/diff/

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#57 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#46 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#216 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#297 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#158 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#587 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#322 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#46 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#237 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#242 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#415 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#143 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#46 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/CMakeLists.txt#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#89 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#49 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#102 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#91 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#17 edit


[ROCm/clr commit: d09ca72f74]
このコミットが含まれているのは:
foreman
2018-04-04 18:00:17 -04:00
コミット 8f7df519da
43個のファイルの変更254行の追加322行の削除
+2 -2
ファイルの表示
@@ -4,7 +4,7 @@
#ifndef APPPROFILE_HPP_
#define APPPROFILE_HPP_
#include <map>
#include <unordered_map>
#include <string>
namespace amd {
@@ -34,7 +34,7 @@ class AppProfile {
void* data_; //!< Pointer to the data
};
typedef std::map<std::string, PropertyData> DataMap;
typedef std::unordered_map<std::string, PropertyData> DataMap;
DataMap propertyDataMap_;
std::string appFileName_; // without extension
+4 -5
ファイルの表示
@@ -702,9 +702,8 @@ bool Program::compileImpl(const std::string& sourceCode,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
@@ -1028,8 +1027,8 @@ bool Program::linkImpl(const std::vector<device::Program*>& inputPrograms,
#if defined(WITH_ONLINE_COMPILER)
std::vector<std::string*> llvmBinaries(inputPrograms.size());
std::vector<amd::OclElf::oclElfSections> elfSectionType(inputPrograms.size());
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
auto it = inputPrograms.cbegin();
const auto itEnd = inputPrograms.cend();
for (size_t i = 0; it != itEnd; ++it, ++i) {
Program* program = (Program*)*it;
+6 -8
ファイルの表示
@@ -430,10 +430,9 @@ void VirtualCPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd) {
//! Go through ext objects by one and call member function to execute
//! a sequence of external graphics API commands for each external object
for (std::vector<amd::Memory*>::const_iterator itr = cmd.getMemList().begin();
itr != cmd.getMemList().end(); itr++) {
if (*itr) {
bError |= !((*itr)->mapExtObjectInCQThread());
for (const auto& it : cmd.getMemList()) {
if (it) {
bError |= !(it->mapExtObjectInCQThread());
}
}
if (bError) {
@@ -453,10 +452,9 @@ void VirtualCPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& cmd) {
bool bError = false;
for (std::vector<amd::Memory*>::const_iterator itr = cmd.getMemList().begin();
itr != cmd.getMemList().end(); itr++) {
if (*itr) {
bError |= !((*itr)->unmapExtObjectInCQThread());
for (const auto& it : cmd.getMemList()) {
if (it) {
bError |= !(it->unmapExtObjectInCQThread());
}
}
if (bError) {
+15 -17
ファイルの表示
@@ -72,7 +72,7 @@ size_t SvmManager::size() {
void SvmManager::AddSvmBuffer(const void* k, amd::Memory* v) {
amd::ScopedLock lock(AllocatedLock_);
svmBufferMap_.insert(std::pair<uintptr_t, amd::Memory*>(reinterpret_cast<uintptr_t>(k), v));
svmBufferMap_.insert({reinterpret_cast<uintptr_t>(k), v});
}
void SvmManager::RemoveSvmBuffer(const void* k) {
@@ -83,7 +83,7 @@ void SvmManager::RemoveSvmBuffer(const void* k) {
amd::Memory* SvmManager::FindSvmBuffer(const void* k) {
amd::ScopedLock lock(AllocatedLock_);
uintptr_t key = reinterpret_cast<uintptr_t>(k);
std::map<uintptr_t, amd::Memory*>::iterator it = svmBufferMap_.upper_bound(key);
auto it = svmBufferMap_.upper_bound(key);
if (it == svmBufferMap_.begin()) {
return NULL;
}
@@ -320,8 +320,7 @@ device::Memory* Device::findMemoryFromVA(const void* ptr, size_t* offset) const
amd::ScopedLock lk(*vaCacheAccess_);
uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
std::map<uintptr_t, device::Memory*>::iterator it =
vaCacheMap_->upper_bound(reinterpret_cast<uintptr_t>(ptr));
auto it = vaCacheMap_->upper_bound(reinterpret_cast<uintptr_t>(ptr));
if (it == vaCacheMap_->begin()) {
return nullptr;
}
@@ -352,10 +351,10 @@ std::vector<Device*> Device::getDevices(cl_device_type type, bool offlineDevices
}
// Create the list of available devices
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
for (const auto& it : *devices_) {
// Check if the device type is matched
if ((*it)->IsTypeMatching(type, offlineDevices)) {
result.push_back(*it);
if (it->IsTypeMatching(type, offlineDevices)) {
result.push_back(it);
}
}
@@ -369,9 +368,9 @@ size_t Device::numDevices(cl_device_type type, bool offlineDevices) {
return 0;
}
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
for (const auto& it : *devices_) {
// Check if the device type is matched
if ((*it)->IsTypeMatching(type, offlineDevices)) {
if (it->IsTypeMatching(type, offlineDevices)) {
++result;
}
}
@@ -393,7 +392,7 @@ bool Device::getDeviceIDs(cl_device_type deviceType, cl_uint numEntries, cl_devi
return false;
}
std::vector<amd::Device*>::iterator it = ret.begin();
auto it = ret.cbegin();
cl_uint count = std::min(numEntries, (cl_uint)ret.size());
while (count--) {
@@ -707,7 +706,7 @@ void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin,
// Insert into the map if it's the first region
if (++pInfo->count_ == 1) {
writeMapInfo_.insert(std::pair<const void*, WriteMapInfo>(mapAddress, info));
writeMapInfo_.insert({mapAddress, info});
}
}
@@ -729,9 +728,8 @@ Program::~Program() { clear(); }
void Program::clear() {
// Destroy all device kernels
kernels_t::const_iterator it;
for (it = kernels_.begin(); it != kernels_.end(); ++it) {
delete it->second;
for (const auto& it : kernels_) {
delete it.second;
}
kernels_.clear();
}
@@ -1035,8 +1033,8 @@ cl_int Program::build(const std::string& sourceCode, const char* origOptions,
bool Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
const amd::option::Options* linkOptions) {
amd::option::Options compileOptions;
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
auto it = inputPrograms.cbegin();
const auto itEnd = inputPrograms.cend();
for (size_t i = 0; it != itEnd; ++it, ++i) {
Program* program = *it;
@@ -1473,7 +1471,7 @@ bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) {
return true;
}
Program::binary_t ClBinary::data() const { return std::make_pair(binary_, size_); }
Program::binary_t ClBinary::data() const { return {binary_, size_}; }
bool ClBinary::setBinary(const char* theBinary, size_t theBinarySize, bool allocated) {
release();
+6 -7
ファイルの表示
@@ -828,9 +828,9 @@ class Memory : public amd::HeapObject {
//! NB, the map data below is for an API-level map (from clEnqueueMapBuffer),
//! not a physical map. When a memory object does not use USE_HOST_PTR we
//! can use a remote resource and DMA, avoiding the additional CPU memcpy.
amd::Memory* mapMemory_; //!< Memory used as map target buffer
volatile size_t indirectMapCount_; //!< Number of maps
std::map<const void*, WriteMapInfo> writeMapInfo_; //!< Saved write map info for partial unmap
amd::Memory* mapMemory_; //!< Memory used as map target buffer
volatile size_t indirectMapCount_; //!< Number of maps
std::unordered_map<const void*, WriteMapInfo> writeMapInfo_; //!< Saved write map info for partial unmap
//! Increment map count
void incIndMapCount() { ++indirectMapCount_; }
@@ -1017,7 +1017,7 @@ class Kernel : public amd::HeapObject {
class Program : public amd::HeapObject {
public:
typedef std::pair<const void*, size_t> binary_t;
typedef std::map<std::string, Kernel*> kernels_t;
typedef std::unordered_map<std::string, Kernel*> kernels_t;
// type of the program
typedef enum {
TYPE_NONE = 0, // uncompiled
@@ -1347,14 +1347,14 @@ class ClBinary : public amd::HeapObject {
inline const Program::binary_t Program::binary() const {
if (clBinary() == NULL) {
return std::make_pair((const void*)0, 0);
return {(const void*)0, 0};
}
return clBinary()->data();
}
inline Program::binary_t Program::binary() {
if (clBinary() == NULL) {
return std::make_pair((const void*)0, 0);
return {(const void*)0, 0};
}
return clBinary()->data();
}
@@ -1750,7 +1750,6 @@ class Device : public RuntimeObject {
static AppProfile* rocAppProfile_;
#endif
typedef std::vector<Device*>::iterator device_iterator;
static std::vector<Device*>* devices_; //!< All known devices
Device* parent_; //!< This device's parent
+2 -4
ファイルの表示
@@ -11,10 +11,8 @@ namespace gpu {
AppProfile::AppProfile()
: amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) {
propertyDataMap_.insert(DataMap::value_type(
"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)));
propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)});
propertyDataMap_.insert(
DataMap::value_type("OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)));
propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)});
}
}
+13 -21
ファイルの表示
@@ -73,16 +73,14 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
functionNameMap[] maps from a function name (linkage name in the generated code)
to ElfSymbol_t, which is defined as above.
*/
std::map<std::string, ElfSymbol_t*> functionNameMap;
std::unordered_map<std::string, ElfSymbol_t*> functionNameMap;
// Keep all kernel ILs if -use-debugil is present (gpu debugging)
std::map<std::string, std::string> kernelILs;
std::unordered_map<std::string, std::string> kernelILs;
~TempWrapper() {
std::map<std::string, ElfSymbol_t *>::iterator I, IB = functionNameMap.begin(),
IE = functionNameMap.end();
for (I = IB; I != IE; ++I) {
delete[](*I).second;
for (const auto& it : functionNameMap) {
delete[] it.second;
}
kernelILs.clear();
@@ -177,10 +175,8 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
}
// Append all function metadata to debugIL
std::map<std::string, ElfSymbol_t *>::iterator I, IB = tempObj.functionNameMap.begin(),
IE = tempObj.functionNameMap.end();
for (I = IB; I != IE; ++I) {
ElfSymbol_t* elfsymbol = (*I).second;
for (const auto& it : tempObj.functionNameMap) {
ElfSymbol_t* elfsymbol = it.second;
if (elfsymbol == NULL) {
// Not valid, skip
continue;
@@ -202,11 +198,9 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
}
// Now, patch the IL from debugIL into functionNameMap[]
std::map<std::string, std::string>::iterator KI, KIB = tempObj.kernelILs.begin(),
KIE = tempObj.kernelILs.end();
for (KI = KIB; KI != KIE; ++KI) {
const std::string& kn = (*KI).first;
const std::string& ilstr = (*KI).second;
for (const auto& it : tempObj.kernelILs) {
const std::string& kn = it.first;
const std::string& ilstr = it.second;
ElfSymbol_t* elfsymbol = tempObj.functionNameMap[kn];
if (elfsymbol == NULL) {
@@ -225,10 +219,8 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
bool recompiled = false;
bool hasKernels = false;
std::map<std::string, ElfSymbol_t *>::iterator I, IB = tempObj.functionNameMap.begin(),
IE = tempObj.functionNameMap.end();
for (I = IB; I != IE; ++I) {
ElfSymbol_t* elfsymbol = (*I).second;
for (const auto& it : tempObj.functionNameMap) {
ElfSymbol_t* elfsymbol = it.second;
if (elfsymbol == NULL) {
// Not valid, skip
continue;
@@ -237,7 +229,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
// and the new binary is needed.
if (saveAMDIL() && (elfsymbol->SymInfo[NDX_METADATA].size > 0)) {
std::string fmetadata = "__OpenCL_";
fmetadata.append((*I).first);
fmetadata.append(it.first);
fmetadata.append("_fmetadata");
if (!elfOut()->addSymbol(amd::OclElf::RODATA, fmetadata.c_str(),
@@ -250,7 +242,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
continue;
}
amd::OclElf::SymbolInfo* sinfo = &(elfsymbol->SymInfo[0]);
std::string FName = (*I).first;
std::string FName = it.first;
// For this kernel, get the demangled kernel name, which is used to identify each kernel.
const size_t name_sz = FName.size() - (sizeof(_kernel) - 1) - (sizeof(__OpenCL_) - 1);
+4 -6
ファイルの表示
@@ -74,9 +74,8 @@ bool NullProgram::compileImpl(const std::string& src,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
@@ -355,9 +354,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
+1 -1
ファイルの表示
@@ -1152,7 +1152,7 @@ device::Program* Device::createProgram(amd::option::Options* options) {
}
//! Requested devices list as configured by the GPU_DEVICE_ORDINAL
typedef std::map<int, bool> requestedDevices_t;
typedef std::unordered_map<int, bool> requestedDevices_t;
//! Parses the requested list of devices to be exposed to the user.
static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) {
+8 -9
ファイルの表示
@@ -1349,10 +1349,9 @@ bool Kernel::bindGlobalHwCb(VirtualGPU& gpu, VirtualGPU::GslKernelDesc* desc) co
// Bind HW constant buffers used for the global data store
const Program::HwConstBuffers& gds = prog().glbHwCb();
for (Program::HwConstBuffers::const_iterator it = gds.begin(); (it != gds.end() && result);
++it) {
uint idx = it->first;
result = bindResource(gpu, *(it->second), idx, ConstantBuffer, idx);
for (const auto& it : gds) {
uint idx = it.first;
result = bindResource(gpu, *(it.second), idx, ConstantBuffer, idx);
}
return result;
@@ -1535,16 +1534,16 @@ void Kernel::debug(VirtualGPU& gpu) const {
}
}
const Program::HwConstBuffers& gds = prog().glbHwCb();
for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) {
uint idx = it->first;
for (const auto& it : gds) {
uint idx = it.first;
std::stringstream fileName;
fileName << counter++ << "_kernel_" << name() << "_const" << idx << ".bin";
stubWrite.open(fileName.str().c_str(), (std::fstream::out | std::fstream::binary));
if (stubWrite.is_open()) {
address memory = reinterpret_cast<address>((it->second)->map(&gpu, Resource::ReadOnly));
address memory = reinterpret_cast<address>(it.second->map(&gpu, Resource::ReadOnly));
// Check if we have OpenCL program
stubWrite.write(reinterpret_cast<char*>(memory), (it->second)->size());
(it->second)->unmap(&gpu);
stubWrite.write(reinterpret_cast<char*>(memory), it.second->size());
it.second->unmap(&gpu);
stubWrite.close();
}
}
+3 -5
ファイルの表示
@@ -543,7 +543,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) {
// First DWORD = Offset to where next information is to
// be written, initialized to 0
// Second DWORD = Number of bytes available for printf data
// = buffer size 2*sizeof(uint32_t)
// = buffer size 2*sizeof(uint32_t)
const uint8_t initSize = 2 * sizeof(uint32_t);
uint8_t sysMem[initSize];
memset(sysMem, 0, initSize);
@@ -601,8 +601,6 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
return false;
}
std::vector<uint>::const_iterator ita;
uint sb = 0;
uint sbt = 0;
@@ -614,8 +612,8 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
}
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
sb += sizeof(uint32_t);
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
sb += *ita;
for (const auto& it : info.arguments_) {
sb += it;
}
if (sbt + sb > bufSize) {
+11 -12
ファイルの表示
@@ -318,7 +318,7 @@ bool NullProgram::linkImpl(amd::option::Options* options) {
std::string metadataStr;
std::vector<ILFunc*> notCalled;
std::vector<ILFunc*> called;
std::map<int, const char**> macros;
std::unordered_map<int, const char**> macros;
size_t j;
Kernel::InitData initData = {0};
@@ -464,8 +464,8 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
amd::option::Options* options, bool createLibrary) {
std::vector<std::string*> llvmBinaries(inputPrograms.size());
std::vector<amd::OclElf::oclElfSections> elfSectionType(inputPrograms.size());
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
auto it = inputPrograms.cbegin();
const auto itEnd = inputPrograms.cend();
for (size_t i = 0; it != itEnd; ++it, ++i) {
NullProgram* program = (NullProgram*)*it;
@@ -682,7 +682,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
std::string metadataStr;
std::vector<ILFunc*> notCalled;
std::vector<ILFunc*> called;
std::map<int, const char**> macros;
std::unordered_map<int, const char**> macros;
size_t j;
Kernel::InitData initData = {0};
@@ -1433,7 +1433,7 @@ NullKernel* NullProgram::createKernel(const std::string& name, const Kernel::Ini
}
// Invoked from ClBinary
bool NullProgram::getAllKernelILs(std::map<std::string, std::string>& allKernelILs,
bool NullProgram::getAllKernelILs(std::unordered_map<std::string, std::string>& allKernelILs,
std::string& programIL, const char* ilKernelName) {
llvm::CompUnit compunit(programIL);
if (ilKernelName != NULL) {
@@ -1471,8 +1471,8 @@ bool NullProgram::createBinary(amd::option::Options* options) {
Program::~Program() {
// Destroy the global HW constant buffers
const Program::HwConstBuffers& gds = glbHwCb();
for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) {
delete it->second;
for (const auto& it : gds) {
delete it.second;
}
// Destroy the global data store
@@ -1634,8 +1634,8 @@ bool HSAILProgram::finiBuild(bool isBuildGood) {
bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
amd::option::Options* options, bool createLibrary) {
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
auto it = inputPrograms.cbegin();
const auto itEnd = inputPrograms.cend();
acl_error errorCode;
// For each program we need to extract the LLVMIR and create
@@ -2037,13 +2037,12 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
delete [] kernelNames;
std::vector<std::string>::iterator it = vKernels.begin();
bool dynamicParallelism = false;
aclMetadata md;
md.numHiddenKernelArgs = 0;
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
for (it; it != vKernels.end(); ++it) {
std::string kernelName(*it);
for (const auto& it : vKernels) {
std::string kernelName(it);
std::string openclKernelName = Kernel::openclMangledName(kernelName);
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
openclKernelName.c_str(), &md.numHiddenKernelArgs,
+2 -2
ファイルの表示
@@ -256,7 +256,7 @@ class NullProgram : public device::Program {
/*! Get all per-kernel IL from programIL, where programIL is the IL for the
* whole compilation unit.
*/
bool getAllKernelILs(std::map<std::string, std::string>& allKernelILs, std::string& programIL,
bool getAllKernelILs(std::unordered_map<std::string, std::string>& allKernelILs, std::string& programIL,
const char* ilKernelName);
protected:
@@ -322,7 +322,7 @@ class Program : public NullProgram {
size_t binarySize = 0 //!< the machine code size
);
typedef std::map<uint, gpu::Memory*> HwConstBuffers;
typedef std::unordered_map<uint, gpu::Memory*> HwConstBuffers;
//! Global HW constant buffers
const HwConstBuffers& glbHwCb() const { return constBufs_; }
+1 -1
ファイルの表示
@@ -1744,7 +1744,7 @@ bool ResourceCache::addCalResource(Resource::CalResourceDesc* desc, GslResourceR
memcpy(descCached, desc, sizeof(Resource::CalResourceDesc));
// Add the current resource to the cache
resCache_.push_front(std::make_pair(descCached, ref));
resCache_.push_front({descCached, ref});
cacheSize_ += size;
result = true;
}
+19 -22
ファイルの表示
@@ -545,9 +545,9 @@ VirtualGPU::~VirtualGPU() {
uint i;
// Destroy all kernels
for (GslKernels::const_iterator it = gslKernels_.begin(); it != gslKernels_.end(); ++it) {
if (it->first != 0) {
freeKernelDesc(it->second);
for (const auto& it : gslKernels_) {
if (it.first != 0) {
freeKernelDesc(it.second);
}
}
gslKernels_.clear();
@@ -1365,10 +1365,9 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand& vcmd) {
profilingBegin(vcmd, true);
std::vector<amd::Memory*>::const_iterator itr;
for (itr = vcmd.memObjects().begin(); itr != vcmd.memObjects().end(); ++itr) {
for (const auto& it : vcmd.memObjects()) {
// Find device memory
gpu::Memory* memory = dev().getGpuMemory(*itr);
gpu::Memory* memory = dev().getGpuMemory(it);
if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) {
memory->mgpuCacheWriteBack();
@@ -2016,7 +2015,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) {
// Loop through all outstanding command batches
while (!cbList_.empty()) {
CommandBatchList::const_iterator it = cbList_.begin();
const auto it = cbList_.cbegin();
// Wait for completion
foundEvent = awaitCompletion(*it, vcmd.waitingEvent());
// Release a command batch
@@ -2210,8 +2209,8 @@ void VirtualGPU::submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand&
const size_t memObjSize = cmd.getMemoryObjectSize();
const std::vector<amd::Memory*>& memObj = cmd.getMemList();
size_t se = 0;
for (std::vector<amd::Memory *>::const_iterator itMemObj = memObj.begin();
itMemObj != memObj.end(); ++itMemObj, ++se) {
for (auto itMemObj = memObj.cbegin();
itMemObj != memObj.cend(); ++itMemObj, ++se) {
// Find GSL Mem Object
gslMemObject gslMemObj = dev().getGpuMemory(*itMemObj)->gslResource();
@@ -2297,15 +2296,14 @@ void VirtualGPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& vcmd) {
profilingBegin(vcmd);
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
it != vcmd.getMemList().end(); ++it) {
for (const auto& it : vcmd.getMemList()) {
// amd::Memory object should never be NULL
assert(*it && "Memory object for interop is NULL");
gpu::Memory* memory = dev().getGpuMemory(*it);
assert(it && "Memory object for interop is NULL");
gpu::Memory* memory = dev().getGpuMemory(it);
// If resource is a shared copy of original resource, then
// runtime needs to copy data from original resource
(*it)->getInteropObj()->copyOrigToShared();
it->getInteropObj()->copyOrigToShared();
// Check if OpenCL has direct access to the interop memory
if (memory->interopType() == Memory::InteropDirectAccess) {
@@ -2336,11 +2334,10 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
profilingBegin(vcmd);
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
it != vcmd.getMemList().end(); ++it) {
for (const auto& it : vcmd.getMemList()) {
// amd::Memory object should never be NULL
assert(*it && "Memory object for interop is NULL");
gpu::Memory* memory = dev().getGpuMemory(*it);
assert(it && "Memory object for interop is NULL");
gpu::Memory* memory = dev().getGpuMemory(it);
// Check if we can use HW interop
if (memory->interopType() == Memory::InteropHwEmulation) {
@@ -2362,7 +2359,7 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
// If resource is a shared copy of original resource, then
// runtime needs to copy data back to original resource
(*it)->getInteropObj()->copySharedToOrig();
it->getInteropObj()->copySharedToOrig();
}
profilingEnd(vcmd);
@@ -2513,7 +2510,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
wait |= state_.forceWait_;
// Loop through all outstanding command batches
while (!cbList_.empty()) {
CommandBatchList::const_iterator it = cbList_.begin();
const auto it = cbList_.cbegin();
// Check if command batch finished without a wait
bool finished = true;
for (uint i = 0; i < AllEngines; ++i) {
@@ -2537,8 +2534,8 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); }
void VirtualGPU::releaseMemObjects(bool scratch) {
for (GpuEvents::const_iterator it = gpuEvents_.begin(); it != gpuEvents_.end(); ++it) {
GpuEvent event = it->second;
for (const auto& it : gpuEvents_) {
GpuEvent event = it.second;
waitForEvent(&event);
}
// Unbind all resources.So the queue won't have any bound mem objects
+2 -2
ファイルの表示
@@ -380,8 +380,8 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext {
) const;
private:
typedef std::map<CALimage, GslKernelDesc*> GslKernels;
typedef std::map<gslMemObject, GpuEvent> GpuEvents;
typedef std::unordered_map<CALimage, GslKernelDesc*> GslKernels;
typedef std::unordered_map<gslMemObject, GpuEvent> GpuEvents;
//! Finds total amount of necessary iterations
inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes
+2 -4
ファイルの表示
@@ -11,10 +11,8 @@ namespace pal {
AppProfile::AppProfile()
: amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) {
propertyDataMap_.insert(DataMap::value_type(
"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)));
propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)});
propertyDataMap_.insert(
DataMap::value_type("OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)));
propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)});
}
}
+6 -8
ファイルの表示
@@ -65,9 +65,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
@@ -282,9 +281,8 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
@@ -343,10 +341,10 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
case 100:
case 110:
case 120:
hdr = std::make_pair(opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size);
hdr = {opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size};
break;
case 200:
hdr = std::make_pair(opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size);
hdr = {opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size};
break;
default:
buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
+1 -1
ファイルの表示
@@ -1081,7 +1081,7 @@ device::Program* Device::createProgram(amd::option::Options* options) {
}
//! Requested devices list as configured by the GPU_DEVICE_ORDINAL
typedef std::map<int, bool> requestedDevices_t;
typedef std::unordered_map<int, bool> requestedDevices_t;
//! Parses the requested list of devices to be exposed to the user.
static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) {
+3 -4
ファイルの表示
@@ -539,7 +539,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) {
// First DWORD = Offset to where next information is to
// be written, initialized to 0
// Second DWORD = Number of bytes available for printf data
// = buffer size 2*sizeof(uint32_t)
// = buffer size 2*sizeof(uint32_t)
const uint8_t initSize = 2 * sizeof(uint32_t);
uint8_t sysMem[initSize];
memset(sysMem, 0, initSize);
@@ -597,7 +597,6 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
return false;
}
std::vector<uint>::const_iterator ita;
uint sb = 0;
uint sbt = 0;
@@ -609,8 +608,8 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
}
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
sb += sizeof(uint32_t);
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
sb += *ita;
for (const auto& it : info.arguments_) {
sb += it;
}
if (sbt + sb > bufSize) {
+4 -5
ファイルの表示
@@ -228,8 +228,8 @@ bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
assert(!"Should not reach here");
return false;
#else // !defined(WITH_LIGHTNING_COMPILER)
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
auto it = inputPrograms.cbegin();
const auto itEnd = inputPrograms.cend();
acl_error errorCode;
// For each program we need to extract the LLVMIR and create
@@ -656,10 +656,9 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
delete [] kernelNames;
std::vector<std::string>::iterator it = vKernels.begin();
bool dynamicParallelism = false;
for (it; it != vKernels.end(); ++it) {
std::string kernelName(*it);
for (const auto& it : vKernels) {
std::string kernelName(it);
std::string openclKernelName = device::Kernel::openclMangledName(kernelName);
HSAILKernel* aKernel =
+2 -2
ファイルの表示
@@ -1133,7 +1133,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
Pal::GpuMemoryCreateInfo createInfo = {};
createInfo.size = desc().width_ * elementSize_;
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
createInfo.alignment = MaxGpuAlignment;
createInfo.alignment = desc().scratch_ ? 64*Ki : MaxGpuAlignment;
createInfo.vaRange = Pal::VaRange::Default;
createInfo.priority = Pal::GpuMemPriority::Normal;
@@ -1970,7 +1970,7 @@ bool ResourceCache::addGpuMemory(Resource::Descriptor* desc,
amd::ScopedLock l(&lockCacheOps_);
// Add the current resource to the cache
resCache_.push_front(std::make_pair(descCached, ref));
resCache_.push_front({descCached, ref});
ref->gpu_ = nullptr;
cacheSize_ += size;
result = true;
+10 -13
ファイルの表示
@@ -1802,10 +1802,9 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand& vcmd) {
profilingBegin(vcmd, true);
std::vector<amd::Memory*>::const_iterator itr;
for (itr = vcmd.memObjects().begin(); itr != vcmd.memObjects().end(); ++itr) {
for (const auto& it : vcmd.memObjects()) {
// Find device memory
pal::Memory* memory = dev().getGpuMemory(*itr);
pal::Memory* memory = dev().getGpuMemory(it);
if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) {
memory->mgpuCacheWriteBack();
@@ -2478,15 +2477,14 @@ void VirtualGPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& vcmd) {
profilingBegin(vcmd);
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
it != vcmd.getMemList().end(); ++it) {
for (const auto& it : vcmd.getMemList()) {
// amd::Memory object should never be nullptr
assert(*it && "Memory object for interop is nullptr");
pal::Memory* memory = dev().getGpuMemory(*it);
assert(it && "Memory object for interop is nullptr");
pal::Memory* memory = dev().getGpuMemory(it);
// If resource is a shared copy of original resource, then
// runtime needs to copy data from original resource
(*it)->getInteropObj()->copyOrigToShared();
it->getInteropObj()->copyOrigToShared();
// Check if OpenCL has direct access to the interop memory
if (memory->interopType() == Memory::InteropDirectAccess) {
@@ -2517,11 +2515,10 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
profilingBegin(vcmd);
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
it != vcmd.getMemList().end(); ++it) {
for (const auto& it : vcmd.getMemList()) {
// amd::Memory object should never be nullptr
assert(*it && "Memory object for interop is nullptr");
pal::Memory* memory = dev().getGpuMemory(*it);
assert(it && "Memory object for interop is nullptr");
pal::Memory* memory = dev().getGpuMemory(it);
// Check if we can use HW interop
if (memory->interopType() == Memory::InteropHwEmulation) {
@@ -2543,7 +2540,7 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
// If resource is a shared copy of original resource, then
// runtime needs to copy data back to original resource
(*it)->getInteropObj()->copySharedToOrig();
it->getInteropObj()->copySharedToOrig();
}
profilingEnd(vcmd);
+1 -1
ファイルの表示
@@ -163,7 +163,7 @@ class VirtualGPU : public device::VirtualDevice {
uint cmdBufIdCurrent_; //!< Current global command buffer ID
uint cmbBufIdRetired_; //!< The last retired command buffer ID
uint cmdCnt_; //!< Counter of commands
std::map<GpuMemoryReference*, uint> memReferences_;
std::unordered_map<GpuMemoryReference*, uint> memReferences_;
Util::VirtualLinearAllocator vlAlloc_;
std::vector<Pal::GpuMemoryRef> palMemRefs_;
std::vector<Pal::IGpuMemory*> palMems_;
+4 -4
ファイルの表示
@@ -51,19 +51,19 @@ foreach(AMDGCN_LIB_TARGET ${AMDGCN_LIB_TARGETS})
if (${AMDGCN_LIB_TARGET} MATCHES "^oclc_isa_version_[0-9]+_lib$")
string(REGEX REPLACE "^oclc_isa_version_([0-9]+)_lib$" "\\1" gfxip ${AMDGCN_LIB_TARGET})
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc
"case ${gfxip}: return std::make_pair(oclc_isa_version_${gfxip}_amdgcn, oclc_isa_version_${gfxip}_amdgcn_size); break;\n")
"case ${gfxip}: return {oclc_isa_version_${gfxip}_amdgcn, oclc_isa_version_${gfxip}_amdgcn_size}; break;\n")
endif()
endforeach()
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc
"default: return std::make_pair((const void*)0,(size_t)0);\n}\n}\n")
"default: return {(const void*)0,(size_t)0};\n}\n}\n")
foreach(AMDGCN_LIB_TARGET ${AMDGCN_LIB_TARGETS})
if (${AMDGCN_LIB_TARGET} MATCHES "oclc_(.*)_on_lib")
string(REGEX REPLACE "oclc_(.*)_on_lib" "\\1" function ${AMDGCN_LIB_TARGET})
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc
"static inline std::pair<const char*, size_t> get_oclc_${function}(bool on)\n{ return std::make_pair("
"static inline std::pair<const char*, size_t> get_oclc_${function}(bool on)\n{ return {"
"(const char*)(on ? oclc_${function}_on_amdgcn : oclc_${function}_off_amdgcn),"
"on ? oclc_${function}_on_amdgcn_size : oclc_${function}_off_amdgcn_size);}\n")
"on ? oclc_${function}_on_amdgcn_size : oclc_${function}_off_amdgcn_size};}\n")
endif()
endforeach()
+1 -2
ファイルの表示
@@ -183,8 +183,7 @@ void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr)
flags, &buf_size, &ptr, nullptr, nullptr)) {
// Ask GPUPro driver to provide CPU access to allocation
if (0 == Funcs().AmdgpuBoCpuMap(buf_handle, host_ptr)) {
allocs_.insert(std::pair<void*, std::pair<amdgpu_bo_handle, uint32_t>>(
ptr, std::pair<amdgpu_bo_handle, uint32_t>(buf_handle, shared_handle)));
allocs_.insert({ptr, {buf_handle, shared_handle}});
}
else {
hsa_amd_interop_unmap_buffer(ptr);
+2 -2
ファイルの表示
@@ -9,7 +9,7 @@
#include "profuncs.hpp"
#include "prodriver.hpp"
#include "thread/monitor.hpp"
#include <map>
#include <unordered_map>
/*! \addtogroup HSA
* @{
@@ -53,7 +53,7 @@ private:
amdgpu_device_handle dev_handle_; //!< AMD gpu device handle
amdgpu_gpu_info gpu_info_; //!< GPU info structure
amdgpu_heap_info heap_info_; //!< Information about memory
mutable std::map<void*, std::pair<amdgpu_bo_handle, uint32_t>> allocs_; //!< Alloced memory mapping
mutable std::unordered_map<void*, std::pair<amdgpu_bo_handle, uint32_t>> allocs_; //!< Alloced memory mapping
amd::Monitor* alloc_ops_; //!< Serializes memory allocations/destructions
};
+1 -1
ファイルの表示
@@ -10,7 +10,7 @@
namespace roc {
typedef std::map<std::string, device::Kernel*> NameKernelMap;
typedef std::unordered_map<std::string, device::Kernel*> NameKernelMap;
class ClBinary : public device::ClBinary {
public:
+6 -9
ファイルの表示
@@ -7,7 +7,6 @@
#include <sstream>
#include <fstream>
#include <iostream>
#include <iterator>
#include "os/os.hpp"
#include "rocdevice.hpp"
@@ -80,9 +79,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
@@ -249,9 +247,8 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
@@ -309,10 +306,10 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
case 100:
case 110:
case 120:
hdr = std::make_pair(opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size);
hdr = {opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size};
break;
case 200:
hdr = std::make_pair(opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size);
hdr = {opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size};
break;
default:
buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
+5 -6
ファイルの表示
@@ -336,12 +336,11 @@ uint64_t PerfCounter::getInfo(uint64_t infoType) const {
&data);
uint64_t result = 0;
std::vector<hsa_ven_amd_aqlprofile_info_data_t>::iterator it;
for (it = data.begin(); it != data.end(); ++it) {
if (it->pmc_data.event.block_name == event_.block_name &&
it->pmc_data.event.block_index == event_.block_index &&
it->pmc_data.event.counter_id == event_.counter_id) {
result += it->pmc_data.result;
for (const auto& it : data) {
if (it.pmc_data.event.block_name == event_.block_name &&
it.pmc_data.event.block_index == event_.block_index &&
it.pmc_data.event.counter_id == event_.counter_id) {
result += it.pmc_data.result;
}
}
return result;
+2 -3
ファイルの表示
@@ -415,7 +415,6 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled,
return false;
}
std::vector<uint>::const_iterator ita;
uint sb = 0;
uint sbt = 0;
@@ -427,8 +426,8 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled,
}
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
sb += sizeof(uint32_t);
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
sb += *ita;
for (const auto& ita : info.arguments_) {
sb += ita;
}
size_t idx = 1;
+2 -3
ファイルの表示
@@ -24,7 +24,6 @@
#include <fstream>
#include <sstream>
#include <iostream>
#include <iterator>
namespace roc {
@@ -535,8 +534,8 @@ aclType HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& compl
bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
amd::option::Options* options, bool createLibrary) {
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
auto it = inputPrograms.cbegin();
const auto itEnd = inputPrograms.cend();
acl_error errorCode;
// For each program we need to extract the LLVMIR and create
+14 -19
ファイルの表示
@@ -370,11 +370,10 @@ void UnmapMemoryCommand::releaseResources() {
bool MigrateMemObjectsCommand::validateMemory() {
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
std::vector<amd::Memory*>::const_iterator itr;
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
for (const auto& it : memObjects_) {
device::Memory* mem = it->getDeviceMemory(queue()->device());
if (NULL == mem) {
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
return false;
}
}
@@ -434,11 +433,10 @@ cl_int NDRangeKernelCommand::validateMemory() {
bool ExtObjectsCommand::validateMemory() {
bool retVal = true;
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
itr != memObjects_.end(); itr++) {
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
for (const auto& it : memObjects_) {
device::Memory* mem = it->getDeviceMemory(queue()->device());
if (NULL == mem) {
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
return false;
}
retVal = processGLResource(mem);
@@ -457,11 +455,10 @@ bool ReleaseExtObjectsCommand::processGLResource(device::Memory* mem) {
bool MakeBuffersResidentCommand::validateMemory() {
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
itr != memObjects_.end(); itr++) {
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
for (const auto& it : memObjects_) {
device::Memory* mem = it->getDeviceMemory(queue()->device());
if (NULL == mem) {
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
return false;
}
}
@@ -471,16 +468,14 @@ bool MakeBuffersResidentCommand::validateMemory() {
}
bool ThreadTraceMemObjectsCommand::validateMemory() {
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
itr != memObjects_.end(); itr++) {
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
for (auto& it = memObjects_.cbegin(); it != memObjects_.cend(); it++) {
device::Memory* mem = (*it)->getDeviceMemory(queue()->device());
if (NULL == mem) {
std::vector<amd::Memory*>::const_iterator tmpItr;
for (tmpItr = memObjects_.begin(); tmpItr != itr; tmpItr++) {
device::Memory* tmpMem = (*tmpItr)->getDeviceMemory(queue()->device());
for (auto& tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) {
device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device());
delete tmpMem;
}
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize());
return false;
}
}
+17 -24
ファイルの表示
@@ -720,10 +720,9 @@ class MigrateMemObjectsCommand : public Command {
const std::vector<amd::Memory*>& memObjects,
cl_mem_migration_flags flags)
: Command(queue, type, eventWaitList), migrationFlags_(flags) {
std::vector<amd::Memory*>::const_iterator itr;
for (itr = memObjects.begin(); itr != memObjects.end(); itr++) {
(*itr)->retain();
memObjects_.push_back(*itr);
for (const auto& it : memObjects) {
it->retain();
memObjects_.push_back(it);
}
}
@@ -731,9 +730,8 @@ class MigrateMemObjectsCommand : public Command {
//! Release all resources associated with this command
void releaseResources() {
std::vector<amd::Memory*>::const_iterator itr;
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
(*itr)->release();
for (const auto& it : memObjects_) {
it->release();
}
Command::releaseResources();
}
@@ -837,18 +835,16 @@ class ExtObjectsCommand : public Command {
ExtObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects,
const std::vector<amd::Memory*>& memoryObjects, cl_command_type type)
: Command(queue, type, eventWaitList) {
for (std::vector<amd::Memory*>::const_iterator itr = memoryObjects.begin();
itr != memoryObjects.end(); itr++) {
(*itr)->retain();
memObjects_.push_back(*itr);
for (const auto& it : memoryObjects) {
it->retain();
memObjects_.push_back(it);
}
}
//! Release all resources associated with this command
void releaseResources() {
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
itr != memObjects_.end(); itr++) {
(*itr)->release();
for (const auto& it : memObjects_) {
it->release();
}
Command::releaseResources();
}
@@ -954,9 +950,8 @@ class ThreadTraceMemObjectsCommand : public Command {
//! Release all resources associated with this command
void releaseResources() {
threadTrace_.release();
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
itr != memObjects_.end(); itr++) {
(*itr)->release();
for (const auto& itr : memObjects_) {
itr->release();
}
Command::releaseResources();
}
@@ -1067,19 +1062,17 @@ class MakeBuffersResidentCommand : public Command {
const std::vector<amd::Memory*>& memObjects,
cl_bus_address_amd* busAddr)
: Command(queue, type, eventWaitList), busAddresses_(busAddr) {
std::vector<amd::Memory*>::const_iterator itr;
for (itr = memObjects.begin(); itr != memObjects.end(); itr++) {
(*itr)->retain();
memObjects_.push_back(*itr);
for (const auto& it : memObjects) {
it->retain();
memObjects_.push_back(it);
}
}
virtual void submit(device::VirtualDevice& device) { device.submitMakeBuffersResident(*this); }
void releaseResources() {
std::vector<amd::Memory*>::const_iterator itr;
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
(*itr)->release();
for (const auto& it : memObjects_) {
it->release();
}
Command::releaseResources();
}
+3 -4
ファイルの表示
@@ -96,15 +96,14 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
// Process the command's event wait list.
const Command::EventWaitList& events = command->eventWaitList();
Command::EventWaitList::const_iterator it;
bool dependencyFailed = false;
for (it = events.begin(); it != events.end(); ++it) {
for (const auto& it : events) {
// Only wait if the command is enqueued into another queue.
if ((*it)->command().queue() != this) {
if (it->command().queue() != this) {
virtualDevice->flush(head, true);
tail = head = NULL;
dependencyFailed |= !(*it)->awaitCompletion();
dependencyFailed |= !it->awaitCompletion();
}
}
+8 -11
ファイルの表示
@@ -62,10 +62,9 @@ Context::~Context() {
// Dissociate OCL context with any external device
if (info_.flags_ & (GLDeviceKhr | D3D10DeviceKhr | D3D11DeviceKhr)) {
std::vector<Device*>::const_iterator it;
// Loop through all devices
for (it = devices_.begin(); it != devices_.end(); it++) {
(*it)->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY);
for (const auto& it : devices_) {
it->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY);
}
}
@@ -218,10 +217,9 @@ int Context::create(const intptr_t* properties) {
// Check if OCL context can be associated with any external device
if (info_.flags_ & (D3D10DeviceKhr | D3D11DeviceKhr | GLDeviceKhr | D3D9DeviceKhr |
D3D9DeviceEXKhr | D3D9DeviceVAKhr)) {
std::vector<Device*>::const_iterator it;
// Loop through all devices
for (it = devices_.begin(); it != devices_.end(); it++) {
if (!(*it)->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) {
for (const auto& it : devices_) {
if (!it->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) {
result = CL_INVALID_VALUE;
}
}
@@ -331,10 +329,9 @@ void Context::svmFree(void* ptr) const {
}
bool Context::containsDevice(const Device* device) const {
std::vector<Device*>::const_iterator it;
for (it = devices_.begin(); it != devices_.end(); ++it) {
if (device == *it || (*it)->isAncestor(device)) {
for (const auto& it : devices_) {
if (device == it || it->isAncestor(device)) {
return true;
}
}
@@ -342,8 +339,8 @@ bool Context::containsDevice(const Device* device) const {
}
DeviceQueue* Context::defDeviceQueue(const Device& dev) const {
std::map<const Device*, DeviceQueueInfo>::const_iterator it = deviceQueues_.find(&dev);
if (it != deviceQueues_.end()) {
const auto it = deviceQueues_.find(&dev);
if (it != deviceQueues_.cend()) {
return it->second.defDeviceQueue_;
} else {
return NULL;
+3 -3
ファイルの表示
@@ -11,7 +11,7 @@
#include "platform/agent.hpp"
#include <vector>
#include <map>
#include <unordered_map>
namespace amd {
@@ -197,8 +197,8 @@ class Context : public RuntimeObject {
GLFunctions* glenv_; //!< OpenGL context
Device* customHostAllocDevice_; //!< Device responsible for host allocations
std::vector<Device*> svmAllocDevice_; //!< Devices can support SVM allocations
std::map<const Device*, DeviceQueueInfo> deviceQueues_; //!< Device queues mapping
mutable Monitor ctxLock_; //!< Lock for the context access
std::unordered_map<const Device*, DeviceQueueInfo> deviceQueues_; //!< Device queues mapping
mutable Monitor ctxLock_; //!< Lock for the context access
};
/*! @}
+2 -1
ファイルの表示
@@ -18,6 +18,7 @@
#include <vector>
#include <list>
#include <map>
#include <unordered_map>
namespace device {
class Memory;
@@ -124,7 +125,7 @@ class Memory : public amd::RuntimeObject {
DeviceMemory* deviceMemories_;
//! The device alloced state
std::map<const Device*, AllocState> deviceAlloced_;
std::unordered_map<const Device*, AllocState> deviceAlloced_;
//! Linked list of destructor callbacks.
std::atomic<DestructorCallBackEntry*> destructorCallbacks_;
+1 -1
ファイルの表示
@@ -24,7 +24,7 @@ namespace amd {
*/
class PerfCounter : public RuntimeObject {
public:
typedef std::map<cl_perfcounter_property, ulong> Properties;
typedef std::unordered_map<cl_perfcounter_property, ulong> Properties;
//! Constructor of the performance counter object
PerfCounter(const Device& device, //!< device object
+46 -58
ファイルの表示
@@ -21,13 +21,12 @@ namespace amd {
Program::~Program() {
// Destroy all device programs
deviceprograms_t::const_iterator it, itEnd;
for (it = devicePrograms_.begin(), itEnd = devicePrograms_.end(); it != itEnd; ++it) {
delete it->second;
for (const auto& it : devicePrograms_) {
delete it.second;
}
for (devicebinary_t::const_iterator IT = binary_.begin(), IE = binary_.end(); IT != IE; ++IT) {
const binary_t& Bin = IT->second;
for (const auto& it : binary_) {
const binary_t& Bin = it.second;
if (Bin.first) {
delete[] Bin.first;
}
@@ -43,8 +42,8 @@ const Symbol* Program::findSymbol(const char* kernelName) const {
return NULL;
}
symbols_t::const_iterator it = symbolTable_->find(kernelName);
return (it == symbolTable_->end()) ? NULL : &it->second;
const auto it = symbolTable_->find(kernelName);
return (it == symbolTable_->cend()) ? NULL : &it->second;
}
cl_int Program::addDeviceProgram(Device& device, const void* image, size_t length,
@@ -151,8 +150,8 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt
}
device::Program* Program::getDeviceProgram(const Device& device) const {
deviceprograms_t::const_iterator it = devicePrograms_.find(&device.rootDevice());
if (it == devicePrograms_.end()) {
const auto it = devicePrograms_.find(&device.rootDevice());
if (it == devicePrograms_.cend()) {
return NULL;
}
return it->second;
@@ -198,16 +197,15 @@ cl_int Program::compile(const std::vector<Device*>& devices, size_t numHeaders,
}
// Compile the program programs associated with the given devices.
std::vector<Device*>::const_iterator it;
for (it = devices.begin(); it != devices.end(); ++it) {
device::Program* devProgram = getDeviceProgram(**it);
for (const auto& it : devices) {
device::Program* devProgram = getDeviceProgram(*it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
const binary_t& bin = binary(*it);
retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
devProgram = getDeviceProgram(**it);
devProgram = getDeviceProgram(*it);
}
if (devProgram->type() == device::Program::TYPE_INTERMEDIATE || language_ == SPIRV) {
@@ -277,8 +275,7 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
}
// Link the program programs associated with the given devices.
std::vector<Device*>::const_iterator it;
for (it = devices.begin(); it != devices.end(); ++it) {
for (const auto& it : devices) {
// find the corresponding device program in each input program
std::vector<device::Program*> inputDevPrograms(numInputs);
bool found = false;
@@ -288,8 +285,8 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
parsedOptions.oVariables->BinaryIsSpirv = true;
}
deviceprograms_t inputDevProgs = inputProgram.devicePrograms();
deviceprograms_t::const_iterator findIt = inputDevProgs.find(*it);
if (findIt == inputDevProgs.end()) {
const auto findIt = inputDevProgs.find(it);
if (findIt == inputDevProgs.cend()) {
if (found) break;
continue;
}
@@ -328,14 +325,14 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
return CL_INVALID_VALUE;
}
device::Program* devProgram = getDeviceProgram(**it);
device::Program* devProgram = getDeviceProgram(*it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
const binary_t& bin = binary(*it);
retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
devProgram = getDeviceProgram(**it);
devProgram = getDeviceProgram(*it);
}
// We only build a Device-Program once
@@ -359,16 +356,14 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
}
// Rebuild the symbol table
deviceprograms_t::iterator sit;
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
const Device& device = *sit->first;
const device::Program& program = *sit->second;
for (const auto& sit : devicePrograms_) {
const Device& device = *(sit.first);
const device::Program& program = *(sit.second);
const device::Program::kernels_t& kernels = program.kernels();
device::Program::kernels_t::const_iterator kit;
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
const std::string& name = kit->first;
const device::Kernel* devKernel = kit->second;
for (const auto& it : kernels) {
const std::string& name = it.first;
const device::Kernel* devKernel = it.second;
Symbol& symbol = (*symbolTable_)[name];
if (!symbol.setDeviceKernel(device, devKernel)) {
@@ -379,9 +374,8 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
// Create a string with all kernel names from the program
if (kernelNames_.length() == 0) {
amd::Program::symbols_t::const_iterator it;
for (it = symbols().begin(); it != symbols().end(); ++it) {
if (it != symbols().begin()) {
for (auto it = symbols().cbegin(); it != symbols().cend(); ++it) {
if (it != symbols().cbegin()) {
kernelNames_.append(1, ';');
}
kernelNames_.append(it->first.c_str());
@@ -474,20 +468,19 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
}
// Build the program programs associated with the given devices.
std::vector<Device*>::const_iterator it;
for (it = devices.begin(); it != devices.end(); ++it) {
device::Program* devProgram = getDeviceProgram(**it);
for (const auto& it : devices) {
device::Program* devProgram = getDeviceProgram(*it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
const binary_t& bin = binary(*it);
if (sourceCode_.empty() && (bin.first == NULL)) {
retval = false;
continue;
}
retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
devProgram = getDeviceProgram(**it);
devProgram = getDeviceProgram(*it);
}
parsedOptions.oVariables->AssumeAlias = true;
@@ -518,16 +511,14 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
}
// Rebuild the symbol table
deviceprograms_t::iterator sit;
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
const Device& device = *sit->first;
const device::Program& program = *sit->second;
for (const auto& it : devicePrograms_) {
const Device& device = *(it.first);
const device::Program& program = *(it.second);
const device::Program::kernels_t& kernels = program.kernels();
device::Program::kernels_t::const_iterator kit;
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
const std::string& name = kit->first;
const device::Kernel* devKernel = kit->second;
for (const auto& kit : kernels) {
const std::string& name = kit.first;
const device::Kernel* devKernel = kit.second;
Symbol& symbol = (*symbolTable_)[name];
if (!symbol.setDeviceKernel(device, devKernel)) {
@@ -538,9 +529,8 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
// Create a string with all kernel names from the program
if (kernelNames_.length() == 0) {
amd::Program::symbols_t::const_iterator it;
for (it = symbols().begin(); it != symbols().end(); ++it) {
if (it != symbols().begin()) {
for (auto it = symbols().cbegin(); it != symbols().cend(); ++it) {
if (it != symbols().cbegin()) {
kernelNames_.append(1, ';');
}
kernelNames_.append(it->first.c_str());
@@ -555,12 +545,10 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
}
void Program::clear() {
deviceprograms_t::iterator sit;
// Destroy old programs if we have any
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
for (const auto& it : devicePrograms_) {
// Destroy device program
delete sit->second;
delete it.second;
}
devicePrograms_.clear();
@@ -631,13 +619,13 @@ bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func, b
const device::Kernel* Symbol::getDeviceKernel(const Device& device, bool noAlias) const {
const devicekernels_t* devKernels = (noAlias) ? &deviceKernels_ : &devKernelsNoOpt_;
devicekernels_t::const_iterator itEnd = devKernels->end();
devicekernels_t::const_iterator it = devKernels->find(&device);
const auto itEnd = devKernels->cend();
auto it = devKernels->find(&device);
if (it != itEnd) {
return it->second;
}
for (it = devKernels->begin(); it != itEnd; ++it) {
for (it = devKernels->cbegin(); it != itEnd; ++it) {
if (it->first->isAncestor(&device)) {
return it->second;
}
+4 -4
ファイルの表示
@@ -35,7 +35,7 @@ namespace amd {
//! A kernel function symbol
class Symbol : public HeapObject {
public:
typedef std::map<const Device*, const device::Kernel*> devicekernels_t;
typedef std::unordered_map<const Device*, const device::Kernel*> devicekernels_t;
private:
devicekernels_t deviceKernels_; //! All device kernels objects.
@@ -68,9 +68,9 @@ class Program : public RuntimeObject {
public:
typedef std::pair<uint8_t*, size_t> binary_t;
typedef std::set<Device const*> devicelist_t;
typedef std::map<Device const*, binary_t> devicebinary_t;
typedef std::map<Device const*, device::Program*> deviceprograms_t;
typedef std::map<std::string, Symbol> symbols_t;
typedef std::unordered_map<Device const*, binary_t> devicebinary_t;
typedef std::unordered_map<Device const*, device::Program*> deviceprograms_t;
typedef std::unordered_map<std::string, Symbol> symbols_t;
enum Language {
Binary = 0,
+1 -1
ファイルの表示
@@ -14,7 +14,7 @@ namespace amd {
//! Abstraction layer sampler class
class Sampler : public RuntimeObject {
public:
typedef std::map<Device const*, device::Sampler*> DeviceSamplers;
typedef std::unordered_map<Device const*, device::Sampler*> DeviceSamplers;
//! \note the sampler states must match the compiler's defines.
//! See amd_ocl_sys_predef.c
+4 -4
ファイルの表示
@@ -5,7 +5,7 @@
#include "top.hpp"
#include "utils/flags.hpp"
#include <map>
#include <unordered_map>
#include <string>
#include <cstdlib>
#include <cstring>
@@ -75,7 +75,7 @@ void Flag::tearDown() {
}
bool Flag::init() {
typedef std::map<std::string, const char*> vars_type;
typedef std::unordered_map<std::string, const char*> vars_type;
vars_type vars;
#ifdef _WIN32
@@ -116,8 +116,8 @@ bool Flag::init() {
for (size_t i = 0; i < numFlags_; ++i) {
Flag& flag = flags_[i];
vars_type::iterator it = vars.find(flag.name_);
if (it != vars.end()) {
const auto it = vars.find(flag.name_);
if (it != vars.cend()) {
flag.setValue(it->second);
}
}