From 8f7df519dacc8d6112c4db44a604c3a34209b590 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Apr 2018 18:00:17 -0400 Subject: [PATCH] P4 to Git Change 1536925 by vsytchen@vsytchen-ocl-win10 on 2018/04/04 17:20:38 SWDEV-79445 - OCL generic changes and code clean-up 1. This change replaces the use of std::map with std::unordered_map to improve lookup/insert time. 2. Replace the use of std::make_pair and std::pair constructor with uniform initialization for cleaner code. 3. Replace the use of std::Container::iterator type with the auto keyword for cleaner code. 4. Use range based for loops where needed. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14517/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#57 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#72 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#216 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#297 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#158 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#587 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#322 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#237 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#242 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#415 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#143 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#79 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#60 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#84 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/CMakeLists.txt#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#89 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#129 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#102 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#91 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#17 edit [ROCm/clr commit: d09ca72f74a0fc392ae9c6e80caf82db2a184658] --- .../clr/rocclr/runtime/device/appprofile.hpp | 4 +- .../rocclr/runtime/device/cpu/cpuprogram.cpp | 9 +- .../rocclr/runtime/device/cpu/cpuvirtual.cpp | 14 +-- projects/clr/rocclr/runtime/device/device.cpp | 32 +++--- projects/clr/rocclr/runtime/device/device.hpp | 13 +-- .../runtime/device/gpu/gpuappprofile.cpp | 6 +- .../rocclr/runtime/device/gpu/gpubinary.cpp | 34 +++--- .../rocclr/runtime/device/gpu/gpucompiler.cpp | 10 +- .../rocclr/runtime/device/gpu/gpudevice.cpp | 2 +- .../rocclr/runtime/device/gpu/gpukernel.cpp | 17 ++- .../rocclr/runtime/device/gpu/gpuprintf.cpp | 8 +- .../rocclr/runtime/device/gpu/gpuprogram.cpp | 23 ++-- .../rocclr/runtime/device/gpu/gpuprogram.hpp | 4 +- .../rocclr/runtime/device/gpu/gpuresource.cpp | 2 +- .../rocclr/runtime/device/gpu/gpuvirtual.cpp | 41 ++++--- .../rocclr/runtime/device/gpu/gpuvirtual.hpp | 4 +- .../runtime/device/pal/palappprofile.cpp | 6 +- .../rocclr/runtime/device/pal/palcompiler.cpp | 14 +-- .../rocclr/runtime/device/pal/paldevice.cpp | 2 +- .../rocclr/runtime/device/pal/palprintf.cpp | 7 +- .../rocclr/runtime/device/pal/palprogram.cpp | 9 +- .../rocclr/runtime/device/pal/palresource.cpp | 4 +- .../rocclr/runtime/device/pal/palvirtual.cpp | 23 ++-- .../rocclr/runtime/device/pal/palvirtual.hpp | 2 +- .../rocclr/runtime/device/rocm/CMakeLists.txt | 8 +- .../runtime/device/rocm/pro/prodevice.cpp | 3 +- .../runtime/device/rocm/pro/prodevice.hpp | 4 +- .../rocclr/runtime/device/rocm/rocbinary.hpp | 2 +- .../runtime/device/rocm/roccompiler.cpp | 15 +-- .../runtime/device/rocm/roccounters.cpp | 11 +- .../rocclr/runtime/device/rocm/rocprintf.cpp | 5 +- .../rocclr/runtime/device/rocm/rocprogram.cpp | 5 +- .../clr/rocclr/runtime/platform/command.cpp | 33 +++--- .../clr/rocclr/runtime/platform/command.hpp | 41 +++---- .../rocclr/runtime/platform/commandqueue.cpp | 7 +- .../clr/rocclr/runtime/platform/context.cpp | 19 ++-- .../clr/rocclr/runtime/platform/context.hpp | 6 +- .../clr/rocclr/runtime/platform/memory.hpp | 3 +- .../clr/rocclr/runtime/platform/perfctr.hpp | 2 +- .../clr/rocclr/runtime/platform/program.cpp | 104 ++++++++---------- .../clr/rocclr/runtime/platform/program.hpp | 8 +- .../clr/rocclr/runtime/platform/sampler.hpp | 2 +- projects/clr/rocclr/runtime/utils/flags.cpp | 8 +- 43 files changed, 254 insertions(+), 322 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/appprofile.hpp b/projects/clr/rocclr/runtime/device/appprofile.hpp index fb651b3dbc..7707a85fcb 100644 --- a/projects/clr/rocclr/runtime/device/appprofile.hpp +++ b/projects/clr/rocclr/runtime/device/appprofile.hpp @@ -4,7 +4,7 @@ #ifndef APPPROFILE_HPP_ #define APPPROFILE_HPP_ -#include +#include #include namespace amd { @@ -34,7 +34,7 @@ class AppProfile { void* data_; //!< Pointer to the data }; - typedef std::map DataMap; + typedef std::unordered_map DataMap; DataMap propertyDataMap_; std::string appFileName_; // without extension diff --git a/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp b/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp index 436b6ed956..5f52cdf574 100644 --- a/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp +++ b/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp @@ -702,9 +702,8 @@ bool Program::compileImpl(const std::string& sourceCode, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); @@ -1028,8 +1027,8 @@ bool Program::linkImpl(const std::vector& inputPrograms, #if defined(WITH_ONLINE_COMPILER) std::vector llvmBinaries(inputPrograms.size()); std::vector elfSectionType(inputPrograms.size()); - std::vector::const_iterator it = inputPrograms.begin(); - std::vector::const_iterator itEnd = inputPrograms.end(); + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); for (size_t i = 0; it != itEnd; ++it, ++i) { Program* program = (Program*)*it; diff --git a/projects/clr/rocclr/runtime/device/cpu/cpuvirtual.cpp b/projects/clr/rocclr/runtime/device/cpu/cpuvirtual.cpp index 15ab049177..806cee2ed2 100644 --- a/projects/clr/rocclr/runtime/device/cpu/cpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/cpu/cpuvirtual.cpp @@ -430,10 +430,9 @@ void VirtualCPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd) { //! Go through ext objects by one and call member function to execute //! a sequence of external graphics API commands for each external object - for (std::vector::const_iterator itr = cmd.getMemList().begin(); - itr != cmd.getMemList().end(); itr++) { - if (*itr) { - bError |= !((*itr)->mapExtObjectInCQThread()); + for (const auto& it : cmd.getMemList()) { + if (it) { + bError |= !(it->mapExtObjectInCQThread()); } } if (bError) { @@ -453,10 +452,9 @@ void VirtualCPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& cmd) { bool bError = false; - for (std::vector::const_iterator itr = cmd.getMemList().begin(); - itr != cmd.getMemList().end(); itr++) { - if (*itr) { - bError |= !((*itr)->unmapExtObjectInCQThread()); + for (const auto& it : cmd.getMemList()) { + if (it) { + bError |= !(it->unmapExtObjectInCQThread()); } } if (bError) { diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp index a9b3a65f4a..84657dd532 100644 --- a/projects/clr/rocclr/runtime/device/device.cpp +++ b/projects/clr/rocclr/runtime/device/device.cpp @@ -72,7 +72,7 @@ size_t SvmManager::size() { void SvmManager::AddSvmBuffer(const void* k, amd::Memory* v) { amd::ScopedLock lock(AllocatedLock_); - svmBufferMap_.insert(std::pair(reinterpret_cast(k), v)); + svmBufferMap_.insert({reinterpret_cast(k), v}); } void SvmManager::RemoveSvmBuffer(const void* k) { @@ -83,7 +83,7 @@ void SvmManager::RemoveSvmBuffer(const void* k) { amd::Memory* SvmManager::FindSvmBuffer(const void* k) { amd::ScopedLock lock(AllocatedLock_); uintptr_t key = reinterpret_cast(k); - std::map::iterator it = svmBufferMap_.upper_bound(key); + auto it = svmBufferMap_.upper_bound(key); if (it == svmBufferMap_.begin()) { return NULL; } @@ -320,8 +320,7 @@ device::Memory* Device::findMemoryFromVA(const void* ptr, size_t* offset) const amd::ScopedLock lk(*vaCacheAccess_); uintptr_t key = reinterpret_cast(ptr); - std::map::iterator it = - vaCacheMap_->upper_bound(reinterpret_cast(ptr)); + auto it = vaCacheMap_->upper_bound(reinterpret_cast(ptr)); if (it == vaCacheMap_->begin()) { return nullptr; } @@ -352,10 +351,10 @@ std::vector Device::getDevices(cl_device_type type, bool offlineDevices } // Create the list of available devices - for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) { + for (const auto& it : *devices_) { // Check if the device type is matched - if ((*it)->IsTypeMatching(type, offlineDevices)) { - result.push_back(*it); + if (it->IsTypeMatching(type, offlineDevices)) { + result.push_back(it); } } @@ -369,9 +368,9 @@ size_t Device::numDevices(cl_device_type type, bool offlineDevices) { return 0; } - for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) { + for (const auto& it : *devices_) { // Check if the device type is matched - if ((*it)->IsTypeMatching(type, offlineDevices)) { + if (it->IsTypeMatching(type, offlineDevices)) { ++result; } } @@ -393,7 +392,7 @@ bool Device::getDeviceIDs(cl_device_type deviceType, cl_uint numEntries, cl_devi return false; } - std::vector::iterator it = ret.begin(); + auto it = ret.cbegin(); cl_uint count = std::min(numEntries, (cl_uint)ret.size()); while (count--) { @@ -707,7 +706,7 @@ void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin, // Insert into the map if it's the first region if (++pInfo->count_ == 1) { - writeMapInfo_.insert(std::pair(mapAddress, info)); + writeMapInfo_.insert({mapAddress, info}); } } @@ -729,9 +728,8 @@ Program::~Program() { clear(); } void Program::clear() { // Destroy all device kernels - kernels_t::const_iterator it; - for (it = kernels_.begin(); it != kernels_.end(); ++it) { - delete it->second; + for (const auto& it : kernels_) { + delete it.second; } kernels_.clear(); } @@ -1035,8 +1033,8 @@ cl_int Program::build(const std::string& sourceCode, const char* origOptions, bool Program::getCompileOptionsAtLinking(const std::vector& inputPrograms, const amd::option::Options* linkOptions) { amd::option::Options compileOptions; - std::vector::const_iterator it = inputPrograms.begin(); - std::vector::const_iterator itEnd = inputPrograms.end(); + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); for (size_t i = 0; it != itEnd; ++it, ++i) { Program* program = *it; @@ -1473,7 +1471,7 @@ bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) { return true; } -Program::binary_t ClBinary::data() const { return std::make_pair(binary_, size_); } +Program::binary_t ClBinary::data() const { return {binary_, size_}; } bool ClBinary::setBinary(const char* theBinary, size_t theBinarySize, bool allocated) { release(); diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index b1eab73d6a..6671639b65 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -828,9 +828,9 @@ class Memory : public amd::HeapObject { //! NB, the map data below is for an API-level map (from clEnqueueMapBuffer), //! not a physical map. When a memory object does not use USE_HOST_PTR we //! can use a remote resource and DMA, avoiding the additional CPU memcpy. - amd::Memory* mapMemory_; //!< Memory used as map target buffer - volatile size_t indirectMapCount_; //!< Number of maps - std::map writeMapInfo_; //!< Saved write map info for partial unmap + amd::Memory* mapMemory_; //!< Memory used as map target buffer + volatile size_t indirectMapCount_; //!< Number of maps + std::unordered_map writeMapInfo_; //!< Saved write map info for partial unmap //! Increment map count void incIndMapCount() { ++indirectMapCount_; } @@ -1017,7 +1017,7 @@ class Kernel : public amd::HeapObject { class Program : public amd::HeapObject { public: typedef std::pair binary_t; - typedef std::map kernels_t; + typedef std::unordered_map kernels_t; // type of the program typedef enum { TYPE_NONE = 0, // uncompiled @@ -1347,14 +1347,14 @@ class ClBinary : public amd::HeapObject { inline const Program::binary_t Program::binary() const { if (clBinary() == NULL) { - return std::make_pair((const void*)0, 0); + return {(const void*)0, 0}; } return clBinary()->data(); } inline Program::binary_t Program::binary() { if (clBinary() == NULL) { - return std::make_pair((const void*)0, 0); + return {(const void*)0, 0}; } return clBinary()->data(); } @@ -1750,7 +1750,6 @@ class Device : public RuntimeObject { static AppProfile* rocAppProfile_; #endif - typedef std::vector::iterator device_iterator; static std::vector* devices_; //!< All known devices Device* parent_; //!< This device's parent diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp index 5225788e65..0b1df8d6e2 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp @@ -11,10 +11,8 @@ namespace gpu { AppProfile::AppProfile() : amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) { - propertyDataMap_.insert(DataMap::value_type( - "HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_))); + propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)}); - propertyDataMap_.insert( - DataMap::value_type("OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_))); + propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)}); } } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpubinary.cpp b/projects/clr/rocclr/runtime/device/gpu/gpubinary.cpp index 267aa9f255..873091f657 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpubinary.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpubinary.cpp @@ -73,16 +73,14 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { functionNameMap[] maps from a function name (linkage name in the generated code) to ElfSymbol_t, which is defined as above. */ - std::map functionNameMap; + std::unordered_map functionNameMap; // Keep all kernel ILs if -use-debugil is present (gpu debugging) - std::map kernelILs; + std::unordered_map kernelILs; ~TempWrapper() { - std::map::iterator I, IB = functionNameMap.begin(), - IE = functionNameMap.end(); - for (I = IB; I != IE; ++I) { - delete[](*I).second; + for (const auto& it : functionNameMap) { + delete[] it.second; } kernelILs.clear(); @@ -177,10 +175,8 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { } // Append all function metadata to debugIL - std::map::iterator I, IB = tempObj.functionNameMap.begin(), - IE = tempObj.functionNameMap.end(); - for (I = IB; I != IE; ++I) { - ElfSymbol_t* elfsymbol = (*I).second; + for (const auto& it : tempObj.functionNameMap) { + ElfSymbol_t* elfsymbol = it.second; if (elfsymbol == NULL) { // Not valid, skip continue; @@ -202,11 +198,9 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { } // Now, patch the IL from debugIL into functionNameMap[] - std::map::iterator KI, KIB = tempObj.kernelILs.begin(), - KIE = tempObj.kernelILs.end(); - for (KI = KIB; KI != KIE; ++KI) { - const std::string& kn = (*KI).first; - const std::string& ilstr = (*KI).second; + for (const auto& it : tempObj.kernelILs) { + const std::string& kn = it.first; + const std::string& ilstr = it.second; ElfSymbol_t* elfsymbol = tempObj.functionNameMap[kn]; if (elfsymbol == NULL) { @@ -225,10 +219,8 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { bool recompiled = false; bool hasKernels = false; - std::map::iterator I, IB = tempObj.functionNameMap.begin(), - IE = tempObj.functionNameMap.end(); - for (I = IB; I != IE; ++I) { - ElfSymbol_t* elfsymbol = (*I).second; + for (const auto& it : tempObj.functionNameMap) { + ElfSymbol_t* elfsymbol = it.second; if (elfsymbol == NULL) { // Not valid, skip continue; @@ -237,7 +229,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { // and the new binary is needed. if (saveAMDIL() && (elfsymbol->SymInfo[NDX_METADATA].size > 0)) { std::string fmetadata = "__OpenCL_"; - fmetadata.append((*I).first); + fmetadata.append(it.first); fmetadata.append("_fmetadata"); if (!elfOut()->addSymbol(amd::OclElf::RODATA, fmetadata.c_str(), @@ -250,7 +242,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { continue; } amd::OclElf::SymbolInfo* sinfo = &(elfsymbol->SymInfo[0]); - std::string FName = (*I).first; + std::string FName = it.first; // For this kernel, get the demangled kernel name, which is used to identify each kernel. const size_t name_sz = FName.size() - (sizeof(_kernel) - 1) - (sizeof(__OpenCL_) - 1); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp b/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp index 8485c2918c..77c04a4d8a 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp @@ -74,9 +74,8 @@ bool NullProgram::compileImpl(const std::string& src, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); @@ -355,9 +354,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index 496f886397..dbc07c8bef 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -1152,7 +1152,7 @@ device::Program* Device::createProgram(amd::option::Options* options) { } //! Requested devices list as configured by the GPU_DEVICE_ORDINAL -typedef std::map requestedDevices_t; +typedef std::unordered_map requestedDevices_t; //! Parses the requested list of devices to be exposed to the user. static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) { diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp index f04e6c3a67..87c4ff0a7a 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp @@ -1349,10 +1349,9 @@ bool Kernel::bindGlobalHwCb(VirtualGPU& gpu, VirtualGPU::GslKernelDesc* desc) co // Bind HW constant buffers used for the global data store const Program::HwConstBuffers& gds = prog().glbHwCb(); - for (Program::HwConstBuffers::const_iterator it = gds.begin(); (it != gds.end() && result); - ++it) { - uint idx = it->first; - result = bindResource(gpu, *(it->second), idx, ConstantBuffer, idx); + for (const auto& it : gds) { + uint idx = it.first; + result = bindResource(gpu, *(it.second), idx, ConstantBuffer, idx); } return result; @@ -1535,16 +1534,16 @@ void Kernel::debug(VirtualGPU& gpu) const { } } const Program::HwConstBuffers& gds = prog().glbHwCb(); - for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) { - uint idx = it->first; + for (const auto& it : gds) { + uint idx = it.first; std::stringstream fileName; fileName << counter++ << "_kernel_" << name() << "_const" << idx << ".bin"; stubWrite.open(fileName.str().c_str(), (std::fstream::out | std::fstream::binary)); if (stubWrite.is_open()) { - address memory = reinterpret_cast
((it->second)->map(&gpu, Resource::ReadOnly)); + address memory = reinterpret_cast
(it.second->map(&gpu, Resource::ReadOnly)); // Check if we have OpenCL program - stubWrite.write(reinterpret_cast(memory), (it->second)->size()); - (it->second)->unmap(&gpu); + stubWrite.write(reinterpret_cast(memory), it.second->size()); + it.second->unmap(&gpu); stubWrite.close(); } } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp index 3ae7b7c479..39ddfa2f8a 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp @@ -543,7 +543,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) { // First DWORD = Offset to where next information is to // be written, initialized to 0 // Second DWORD = Number of bytes available for printf data - // = buffer size – 2*sizeof(uint32_t) + // = buffer size – 2*sizeof(uint32_t) const uint8_t initSize = 2 * sizeof(uint32_t); uint8_t sysMem[initSize]; memset(sysMem, 0, initSize); @@ -601,8 +601,6 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, return false; } - - std::vector::const_iterator ita; uint sb = 0; uint sbt = 0; @@ -614,8 +612,8 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, } const PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; sb += sizeof(uint32_t); - for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) { - sb += *ita; + for (const auto& it : info.arguments_) { + sb += it; } if (sbt + sb > bufSize) { diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp index 87d2fdae3b..a684034533 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp @@ -318,7 +318,7 @@ bool NullProgram::linkImpl(amd::option::Options* options) { std::string metadataStr; std::vector notCalled; std::vector called; - std::map macros; + std::unordered_map macros; size_t j; Kernel::InitData initData = {0}; @@ -464,8 +464,8 @@ bool NullProgram::linkImpl(const std::vector& inputPrograms, amd::option::Options* options, bool createLibrary) { std::vector llvmBinaries(inputPrograms.size()); std::vector elfSectionType(inputPrograms.size()); - std::vector::const_iterator it = inputPrograms.begin(); - std::vector::const_iterator itEnd = inputPrograms.end(); + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); for (size_t i = 0; it != itEnd; ++it, ++i) { NullProgram* program = (NullProgram*)*it; @@ -682,7 +682,7 @@ bool NullProgram::linkImpl(const std::vector& inputPrograms, std::string metadataStr; std::vector notCalled; std::vector called; - std::map macros; + std::unordered_map macros; size_t j; Kernel::InitData initData = {0}; @@ -1433,7 +1433,7 @@ NullKernel* NullProgram::createKernel(const std::string& name, const Kernel::Ini } // Invoked from ClBinary -bool NullProgram::getAllKernelILs(std::map& allKernelILs, +bool NullProgram::getAllKernelILs(std::unordered_map& allKernelILs, std::string& programIL, const char* ilKernelName) { llvm::CompUnit compunit(programIL); if (ilKernelName != NULL) { @@ -1471,8 +1471,8 @@ bool NullProgram::createBinary(amd::option::Options* options) { Program::~Program() { // Destroy the global HW constant buffers const Program::HwConstBuffers& gds = glbHwCb(); - for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) { - delete it->second; + for (const auto& it : gds) { + delete it.second; } // Destroy the global data store @@ -1634,8 +1634,8 @@ bool HSAILProgram::finiBuild(bool isBuildGood) { bool HSAILProgram::linkImpl(const std::vector& inputPrograms, amd::option::Options* options, bool createLibrary) { - std::vector::const_iterator it = inputPrograms.begin(); - std::vector::const_iterator itEnd = inputPrograms.end(); + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); acl_error errorCode; // For each program we need to extract the LLVMIR and create @@ -2037,13 +2037,12 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) { } std::vector vKernels = splitSpaceSeparatedString(kernelNames); delete [] kernelNames; - std::vector::iterator it = vKernels.begin(); bool dynamicParallelism = false; aclMetadata md; md.numHiddenKernelArgs = 0; size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs); - for (it; it != vKernels.end(); ++it) { - std::string kernelName(*it); + for (const auto& it : vKernels) { + std::string kernelName(it); std::string openclKernelName = Kernel::openclMangledName(kernelName); errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS, openclKernelName.c_str(), &md.numHiddenKernelArgs, diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp index 1e4f10f5f2..893b97d996 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp @@ -256,7 +256,7 @@ class NullProgram : public device::Program { /*! Get all per-kernel IL from programIL, where programIL is the IL for the * whole compilation unit. */ - bool getAllKernelILs(std::map& allKernelILs, std::string& programIL, + bool getAllKernelILs(std::unordered_map& allKernelILs, std::string& programIL, const char* ilKernelName); protected: @@ -322,7 +322,7 @@ class Program : public NullProgram { size_t binarySize = 0 //!< the machine code size ); - typedef std::map HwConstBuffers; + typedef std::unordered_map HwConstBuffers; //! Global HW constant buffers const HwConstBuffers& glbHwCb() const { return constBufs_; } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp index 0c72ce097f..0ea9bedcd8 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp @@ -1744,7 +1744,7 @@ bool ResourceCache::addCalResource(Resource::CalResourceDesc* desc, GslResourceR memcpy(descCached, desc, sizeof(Resource::CalResourceDesc)); // Add the current resource to the cache - resCache_.push_front(std::make_pair(descCached, ref)); + resCache_.push_front({descCached, ref}); cacheSize_ += size; result = true; } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index 0c0bf44400..5abdeaccf4 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -545,9 +545,9 @@ VirtualGPU::~VirtualGPU() { uint i; // Destroy all kernels - for (GslKernels::const_iterator it = gslKernels_.begin(); it != gslKernels_.end(); ++it) { - if (it->first != 0) { - freeKernelDesc(it->second); + for (const auto& it : gslKernels_) { + if (it.first != 0) { + freeKernelDesc(it.second); } } gslKernels_.clear(); @@ -1365,10 +1365,9 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand& vcmd) { profilingBegin(vcmd, true); - std::vector::const_iterator itr; - for (itr = vcmd.memObjects().begin(); itr != vcmd.memObjects().end(); ++itr) { + for (const auto& it : vcmd.memObjects()) { // Find device memory - gpu::Memory* memory = dev().getGpuMemory(*itr); + gpu::Memory* memory = dev().getGpuMemory(it); if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) { memory->mgpuCacheWriteBack(); @@ -2016,7 +2015,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) { // Loop through all outstanding command batches while (!cbList_.empty()) { - CommandBatchList::const_iterator it = cbList_.begin(); + const auto it = cbList_.cbegin(); // Wait for completion foundEvent = awaitCompletion(*it, vcmd.waitingEvent()); // Release a command batch @@ -2210,8 +2209,8 @@ void VirtualGPU::submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand& const size_t memObjSize = cmd.getMemoryObjectSize(); const std::vector& memObj = cmd.getMemList(); size_t se = 0; - for (std::vector::const_iterator itMemObj = memObj.begin(); - itMemObj != memObj.end(); ++itMemObj, ++se) { + for (auto itMemObj = memObj.cbegin(); + itMemObj != memObj.cend(); ++itMemObj, ++se) { // Find GSL Mem Object gslMemObject gslMemObj = dev().getGpuMemory(*itMemObj)->gslResource(); @@ -2297,15 +2296,14 @@ void VirtualGPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& vcmd) { profilingBegin(vcmd); - for (std::vector::const_iterator it = vcmd.getMemList().begin(); - it != vcmd.getMemList().end(); ++it) { + for (const auto& it : vcmd.getMemList()) { // amd::Memory object should never be NULL - assert(*it && "Memory object for interop is NULL"); - gpu::Memory* memory = dev().getGpuMemory(*it); + assert(it && "Memory object for interop is NULL"); + gpu::Memory* memory = dev().getGpuMemory(it); // If resource is a shared copy of original resource, then // runtime needs to copy data from original resource - (*it)->getInteropObj()->copyOrigToShared(); + it->getInteropObj()->copyOrigToShared(); // Check if OpenCL has direct access to the interop memory if (memory->interopType() == Memory::InteropDirectAccess) { @@ -2336,11 +2334,10 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) { profilingBegin(vcmd); - for (std::vector::const_iterator it = vcmd.getMemList().begin(); - it != vcmd.getMemList().end(); ++it) { + for (const auto& it : vcmd.getMemList()) { // amd::Memory object should never be NULL - assert(*it && "Memory object for interop is NULL"); - gpu::Memory* memory = dev().getGpuMemory(*it); + assert(it && "Memory object for interop is NULL"); + gpu::Memory* memory = dev().getGpuMemory(it); // Check if we can use HW interop if (memory->interopType() == Memory::InteropHwEmulation) { @@ -2362,7 +2359,7 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) { // If resource is a shared copy of original resource, then // runtime needs to copy data back to original resource - (*it)->getInteropObj()->copySharedToOrig(); + it->getInteropObj()->copySharedToOrig(); } profilingEnd(vcmd); @@ -2513,7 +2510,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) { wait |= state_.forceWait_; // Loop through all outstanding command batches while (!cbList_.empty()) { - CommandBatchList::const_iterator it = cbList_.begin(); + const auto it = cbList_.cbegin(); // Check if command batch finished without a wait bool finished = true; for (uint i = 0; i < AllEngines; ++i) { @@ -2537,8 +2534,8 @@ void VirtualGPU::flush(amd::Command* list, bool wait) { void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); } void VirtualGPU::releaseMemObjects(bool scratch) { - for (GpuEvents::const_iterator it = gpuEvents_.begin(); it != gpuEvents_.end(); ++it) { - GpuEvent event = it->second; + for (const auto& it : gpuEvents_) { + GpuEvent event = it.second; waitForEvent(&event); } // Unbind all resources.So the queue won't have any bound mem objects diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp index cbf78a1020..4a8c912e4b 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp @@ -380,8 +380,8 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext { ) const; private: - typedef std::map GslKernels; - typedef std::map GpuEvents; + typedef std::unordered_map GslKernels; + typedef std::unordered_map GpuEvents; //! Finds total amount of necessary iterations inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes diff --git a/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp b/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp index 84bc9e4722..e703204719 100644 --- a/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp @@ -11,10 +11,8 @@ namespace pal { AppProfile::AppProfile() : amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) { - propertyDataMap_.insert(DataMap::value_type( - "HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_))); + propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)}); - propertyDataMap_.insert( - DataMap::value_type("OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_))); + propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)}); } } diff --git a/projects/clr/rocclr/runtime/device/pal/palcompiler.cpp b/projects/clr/rocclr/runtime/device/pal/palcompiler.cpp index 7695387278..a0ef0698d6 100644 --- a/projects/clr/rocclr/runtime/device/pal/palcompiler.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palcompiler.cpp @@ -65,9 +65,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); @@ -282,9 +281,8 @@ bool LightningProgram::compileImpl(const std::string& sourceCode, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); @@ -343,10 +341,10 @@ bool LightningProgram::compileImpl(const std::string& sourceCode, case 100: case 110: case 120: - hdr = std::make_pair(opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size); + hdr = {opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size}; break; case 200: - hdr = std::make_pair(opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size); + hdr = {opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size}; break; default: buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n"; diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index b7234df983..57a1a2131a 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -1081,7 +1081,7 @@ device::Program* Device::createProgram(amd::option::Options* options) { } //! Requested devices list as configured by the GPU_DEVICE_ORDINAL -typedef std::map requestedDevices_t; +typedef std::unordered_map requestedDevices_t; //! Parses the requested list of devices to be exposed to the user. static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) { diff --git a/projects/clr/rocclr/runtime/device/pal/palprintf.cpp b/projects/clr/rocclr/runtime/device/pal/palprintf.cpp index a68b23fea9..0f0ad1c7a7 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprintf.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palprintf.cpp @@ -539,7 +539,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) { // First DWORD = Offset to where next information is to // be written, initialized to 0 // Second DWORD = Number of bytes available for printf data - // = buffer size – 2*sizeof(uint32_t) + // = buffer size � 2*sizeof(uint32_t) const uint8_t initSize = 2 * sizeof(uint32_t); uint8_t sysMem[initSize]; memset(sysMem, 0, initSize); @@ -597,7 +597,6 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, return false; } - std::vector::const_iterator ita; uint sb = 0; uint sbt = 0; @@ -609,8 +608,8 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, } const PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; sb += sizeof(uint32_t); - for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) { - sb += *ita; + for (const auto& it : info.arguments_) { + sb += it; } if (sbt + sb > bufSize) { diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp index 27a31bf937..c00aa3bf14 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp @@ -228,8 +228,8 @@ bool HSAILProgram::linkImpl(const std::vector& inputPrograms, assert(!"Should not reach here"); return false; #else // !defined(WITH_LIGHTNING_COMPILER) - std::vector::const_iterator it = inputPrograms.begin(); - std::vector::const_iterator itEnd = inputPrograms.end(); + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); acl_error errorCode; // For each program we need to extract the LLVMIR and create @@ -656,10 +656,9 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) { } std::vector vKernels = splitSpaceSeparatedString(kernelNames); delete [] kernelNames; - std::vector::iterator it = vKernels.begin(); bool dynamicParallelism = false; - for (it; it != vKernels.end(); ++it) { - std::string kernelName(*it); + for (const auto& it : vKernels) { + std::string kernelName(it); std::string openclKernelName = device::Kernel::openclMangledName(kernelName); HSAILKernel* aKernel = diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp index 05bf9c7888..a4d88cc5de 100644 --- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp @@ -1133,7 +1133,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) { Pal::GpuMemoryCreateInfo createInfo = {}; createInfo.size = desc().width_ * elementSize_; createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment); - createInfo.alignment = MaxGpuAlignment; + createInfo.alignment = desc().scratch_ ? 64*Ki : MaxGpuAlignment; createInfo.vaRange = Pal::VaRange::Default; createInfo.priority = Pal::GpuMemPriority::Normal; @@ -1970,7 +1970,7 @@ bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, amd::ScopedLock l(&lockCacheOps_); // Add the current resource to the cache - resCache_.push_front(std::make_pair(descCached, ref)); + resCache_.push_front({descCached, ref}); ref->gpu_ = nullptr; cacheSize_ += size; result = true; diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index a10737b30b..ea7adc98d3 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -1802,10 +1802,9 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand& vcmd) { profilingBegin(vcmd, true); - std::vector::const_iterator itr; - for (itr = vcmd.memObjects().begin(); itr != vcmd.memObjects().end(); ++itr) { + for (const auto& it : vcmd.memObjects()) { // Find device memory - pal::Memory* memory = dev().getGpuMemory(*itr); + pal::Memory* memory = dev().getGpuMemory(it); if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) { memory->mgpuCacheWriteBack(); @@ -2478,15 +2477,14 @@ void VirtualGPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& vcmd) { profilingBegin(vcmd); - for (std::vector::const_iterator it = vcmd.getMemList().begin(); - it != vcmd.getMemList().end(); ++it) { + for (const auto& it : vcmd.getMemList()) { // amd::Memory object should never be nullptr - assert(*it && "Memory object for interop is nullptr"); - pal::Memory* memory = dev().getGpuMemory(*it); + assert(it && "Memory object for interop is nullptr"); + pal::Memory* memory = dev().getGpuMemory(it); // If resource is a shared copy of original resource, then // runtime needs to copy data from original resource - (*it)->getInteropObj()->copyOrigToShared(); + it->getInteropObj()->copyOrigToShared(); // Check if OpenCL has direct access to the interop memory if (memory->interopType() == Memory::InteropDirectAccess) { @@ -2517,11 +2515,10 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) { profilingBegin(vcmd); - for (std::vector::const_iterator it = vcmd.getMemList().begin(); - it != vcmd.getMemList().end(); ++it) { + for (const auto& it : vcmd.getMemList()) { // amd::Memory object should never be nullptr - assert(*it && "Memory object for interop is nullptr"); - pal::Memory* memory = dev().getGpuMemory(*it); + assert(it && "Memory object for interop is nullptr"); + pal::Memory* memory = dev().getGpuMemory(it); // Check if we can use HW interop if (memory->interopType() == Memory::InteropHwEmulation) { @@ -2543,7 +2540,7 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) { // If resource is a shared copy of original resource, then // runtime needs to copy data back to original resource - (*it)->getInteropObj()->copySharedToOrig(); + it->getInteropObj()->copySharedToOrig(); } profilingEnd(vcmd); diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp index 2bfe742bb6..801a8750d6 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp @@ -163,7 +163,7 @@ class VirtualGPU : public device::VirtualDevice { uint cmdBufIdCurrent_; //!< Current global command buffer ID uint cmbBufIdRetired_; //!< The last retired command buffer ID uint cmdCnt_; //!< Counter of commands - std::map memReferences_; + std::unordered_map memReferences_; Util::VirtualLinearAllocator vlAlloc_; std::vector palMemRefs_; std::vector palMems_; diff --git a/projects/clr/rocclr/runtime/device/rocm/CMakeLists.txt b/projects/clr/rocclr/runtime/device/rocm/CMakeLists.txt index b9fb983d06..c05fa16674 100644 --- a/projects/clr/rocclr/runtime/device/rocm/CMakeLists.txt +++ b/projects/clr/rocclr/runtime/device/rocm/CMakeLists.txt @@ -51,19 +51,19 @@ foreach(AMDGCN_LIB_TARGET ${AMDGCN_LIB_TARGETS}) if (${AMDGCN_LIB_TARGET} MATCHES "^oclc_isa_version_[0-9]+_lib$") string(REGEX REPLACE "^oclc_isa_version_([0-9]+)_lib$" "\\1" gfxip ${AMDGCN_LIB_TARGET}) file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc - "case ${gfxip}: return std::make_pair(oclc_isa_version_${gfxip}_amdgcn, oclc_isa_version_${gfxip}_amdgcn_size); break;\n") + "case ${gfxip}: return {oclc_isa_version_${gfxip}_amdgcn, oclc_isa_version_${gfxip}_amdgcn_size}; break;\n") endif() endforeach() file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc - "default: return std::make_pair((const void*)0,(size_t)0);\n}\n}\n") + "default: return {(const void*)0,(size_t)0};\n}\n}\n") foreach(AMDGCN_LIB_TARGET ${AMDGCN_LIB_TARGETS}) if (${AMDGCN_LIB_TARGET} MATCHES "oclc_(.*)_on_lib") string(REGEX REPLACE "oclc_(.*)_on_lib" "\\1" function ${AMDGCN_LIB_TARGET}) file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc - "static inline std::pair get_oclc_${function}(bool on)\n{ return std::make_pair(" + "static inline std::pair get_oclc_${function}(bool on)\n{ return {" "(const char*)(on ? oclc_${function}_on_amdgcn : oclc_${function}_off_amdgcn)," - "on ? oclc_${function}_on_amdgcn_size : oclc_${function}_off_amdgcn_size);}\n") + "on ? oclc_${function}_on_amdgcn_size : oclc_${function}_off_amdgcn_size};}\n") endif() endforeach() diff --git a/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.cpp b/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.cpp index 9d44cdeee3..619e8b19ee 100644 --- a/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.cpp @@ -183,8 +183,7 @@ void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) flags, &buf_size, &ptr, nullptr, nullptr)) { // Ask GPUPro driver to provide CPU access to allocation if (0 == Funcs().AmdgpuBoCpuMap(buf_handle, host_ptr)) { - allocs_.insert(std::pair>( - ptr, std::pair(buf_handle, shared_handle))); + allocs_.insert({ptr, {buf_handle, shared_handle}}); } else { hsa_amd_interop_unmap_buffer(ptr); diff --git a/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.hpp b/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.hpp index a4a18fec4f..06c66fb2da 100644 --- a/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/pro/prodevice.hpp @@ -9,7 +9,7 @@ #include "profuncs.hpp" #include "prodriver.hpp" #include "thread/monitor.hpp" -#include +#include /*! \addtogroup HSA * @{ @@ -53,7 +53,7 @@ private: amdgpu_device_handle dev_handle_; //!< AMD gpu device handle amdgpu_gpu_info gpu_info_; //!< GPU info structure amdgpu_heap_info heap_info_; //!< Information about memory - mutable std::map> allocs_; //!< Alloced memory mapping + mutable std::unordered_map> allocs_; //!< Alloced memory mapping amd::Monitor* alloc_ops_; //!< Serializes memory allocations/destructions }; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocbinary.hpp b/projects/clr/rocclr/runtime/device/rocm/rocbinary.hpp index 757bf797bc..4ec31d676f 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocbinary.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocbinary.hpp @@ -10,7 +10,7 @@ namespace roc { -typedef std::map NameKernelMap; +typedef std::unordered_map NameKernelMap; class ClBinary : public device::ClBinary { public: diff --git a/projects/clr/rocclr/runtime/device/rocm/roccompiler.cpp b/projects/clr/rocclr/runtime/device/rocm/roccompiler.cpp index cc0335cfee..086d972693 100644 --- a/projects/clr/rocclr/runtime/device/rocm/roccompiler.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/roccompiler.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include "os/os.hpp" #include "rocdevice.hpp" @@ -80,9 +79,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); @@ -249,9 +247,8 @@ bool LightningProgram::compileImpl(const std::string& sourceCode, std::string headerIncludeName(headerIncludeNames[i]); // replace / in path with current os's file separator if (amd::Os::fileSeparator() != '/') { - for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end(); - it != end; ++it) { - if (*it == '/') *it = amd::Os::fileSeparator(); + for (auto& it : headerIncludeName) { + if (it == '/') it = amd::Os::fileSeparator(); } } size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); @@ -309,10 +306,10 @@ bool LightningProgram::compileImpl(const std::string& sourceCode, case 100: case 110: case 120: - hdr = std::make_pair(opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size); + hdr = {opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size}; break; case 200: - hdr = std::make_pair(opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size); + hdr = {opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size}; break; default: buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n"; diff --git a/projects/clr/rocclr/runtime/device/rocm/roccounters.cpp b/projects/clr/rocclr/runtime/device/rocm/roccounters.cpp index eea3dba3a1..f5233e0445 100644 --- a/projects/clr/rocclr/runtime/device/rocm/roccounters.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/roccounters.cpp @@ -336,12 +336,11 @@ uint64_t PerfCounter::getInfo(uint64_t infoType) const { &data); uint64_t result = 0; - std::vector::iterator it; - for (it = data.begin(); it != data.end(); ++it) { - if (it->pmc_data.event.block_name == event_.block_name && - it->pmc_data.event.block_index == event_.block_index && - it->pmc_data.event.counter_id == event_.counter_id) { - result += it->pmc_data.result; + for (const auto& it : data) { + if (it.pmc_data.event.block_name == event_.block_name && + it.pmc_data.event.block_index == event_.block_index && + it.pmc_data.event.counter_id == event_.counter_id) { + result += it.pmc_data.result; } } return result; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp b/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp index 82a86d8059..2cd22a3a76 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp @@ -415,7 +415,6 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, return false; } - std::vector::const_iterator ita; uint sb = 0; uint sbt = 0; @@ -427,8 +426,8 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, } const PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; sb += sizeof(uint32_t); - for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) { - sb += *ita; + for (const auto& ita : info.arguments_) { + sb += ita; } size_t idx = 1; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp index 410e02febd..0cbea64166 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp @@ -24,7 +24,6 @@ #include #include #include -#include namespace roc { @@ -535,8 +534,8 @@ aclType HSAILProgram::getCompilationStagesFromBinary(std::vector& compl bool HSAILProgram::linkImpl(const std::vector& inputPrograms, amd::option::Options* options, bool createLibrary) { - std::vector::const_iterator it = inputPrograms.begin(); - std::vector::const_iterator itEnd = inputPrograms.end(); + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); acl_error errorCode; // For each program we need to extract the LLVMIR and create diff --git a/projects/clr/rocclr/runtime/platform/command.cpp b/projects/clr/rocclr/runtime/platform/command.cpp index 308183d07c..2363ce3853 100644 --- a/projects/clr/rocclr/runtime/platform/command.cpp +++ b/projects/clr/rocclr/runtime/platform/command.cpp @@ -370,11 +370,10 @@ void UnmapMemoryCommand::releaseResources() { bool MigrateMemObjectsCommand::validateMemory() { if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) { - std::vector::const_iterator itr; - for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { - device::Memory* mem = (*itr)->getDeviceMemory(queue()->device()); + for (const auto& it : memObjects_) { + device::Memory* mem = it->getDeviceMemory(queue()->device()); if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize()); + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize()); return false; } } @@ -434,11 +433,10 @@ cl_int NDRangeKernelCommand::validateMemory() { bool ExtObjectsCommand::validateMemory() { bool retVal = true; if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) { - for (std::vector::const_iterator itr = memObjects_.begin(); - itr != memObjects_.end(); itr++) { - device::Memory* mem = (*itr)->getDeviceMemory(queue()->device()); + for (const auto& it : memObjects_) { + device::Memory* mem = it->getDeviceMemory(queue()->device()); if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize()); + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize()); return false; } retVal = processGLResource(mem); @@ -457,11 +455,10 @@ bool ReleaseExtObjectsCommand::processGLResource(device::Memory* mem) { bool MakeBuffersResidentCommand::validateMemory() { if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) { - for (std::vector::const_iterator itr = memObjects_.begin(); - itr != memObjects_.end(); itr++) { - device::Memory* mem = (*itr)->getDeviceMemory(queue()->device()); + for (const auto& it : memObjects_) { + device::Memory* mem = it->getDeviceMemory(queue()->device()); if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize()); + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize()); return false; } } @@ -471,16 +468,14 @@ bool MakeBuffersResidentCommand::validateMemory() { } bool ThreadTraceMemObjectsCommand::validateMemory() { if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) { - for (std::vector::const_iterator itr = memObjects_.begin(); - itr != memObjects_.end(); itr++) { - device::Memory* mem = (*itr)->getDeviceMemory(queue()->device()); + for (auto& it = memObjects_.cbegin(); it != memObjects_.cend(); it++) { + device::Memory* mem = (*it)->getDeviceMemory(queue()->device()); if (NULL == mem) { - std::vector::const_iterator tmpItr; - for (tmpItr = memObjects_.begin(); tmpItr != itr; tmpItr++) { - device::Memory* tmpMem = (*tmpItr)->getDeviceMemory(queue()->device()); + for (auto& tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) { + device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device()); delete tmpMem; } - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize()); + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize()); return false; } } diff --git a/projects/clr/rocclr/runtime/platform/command.hpp b/projects/clr/rocclr/runtime/platform/command.hpp index 49fc25dfc6..424340e3c0 100644 --- a/projects/clr/rocclr/runtime/platform/command.hpp +++ b/projects/clr/rocclr/runtime/platform/command.hpp @@ -720,10 +720,9 @@ class MigrateMemObjectsCommand : public Command { const std::vector& memObjects, cl_mem_migration_flags flags) : Command(queue, type, eventWaitList), migrationFlags_(flags) { - std::vector::const_iterator itr; - for (itr = memObjects.begin(); itr != memObjects.end(); itr++) { - (*itr)->retain(); - memObjects_.push_back(*itr); + for (const auto& it : memObjects) { + it->retain(); + memObjects_.push_back(it); } } @@ -731,9 +730,8 @@ class MigrateMemObjectsCommand : public Command { //! Release all resources associated with this command void releaseResources() { - std::vector::const_iterator itr; - for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { - (*itr)->release(); + for (const auto& it : memObjects_) { + it->release(); } Command::releaseResources(); } @@ -837,18 +835,16 @@ class ExtObjectsCommand : public Command { ExtObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : Command(queue, type, eventWaitList) { - for (std::vector::const_iterator itr = memoryObjects.begin(); - itr != memoryObjects.end(); itr++) { - (*itr)->retain(); - memObjects_.push_back(*itr); + for (const auto& it : memoryObjects) { + it->retain(); + memObjects_.push_back(it); } } //! Release all resources associated with this command void releaseResources() { - for (std::vector::const_iterator itr = memObjects_.begin(); - itr != memObjects_.end(); itr++) { - (*itr)->release(); + for (const auto& it : memObjects_) { + it->release(); } Command::releaseResources(); } @@ -954,9 +950,8 @@ class ThreadTraceMemObjectsCommand : public Command { //! Release all resources associated with this command void releaseResources() { threadTrace_.release(); - for (std::vector::const_iterator itr = memObjects_.begin(); - itr != memObjects_.end(); itr++) { - (*itr)->release(); + for (const auto& itr : memObjects_) { + itr->release(); } Command::releaseResources(); } @@ -1067,19 +1062,17 @@ class MakeBuffersResidentCommand : public Command { const std::vector& memObjects, cl_bus_address_amd* busAddr) : Command(queue, type, eventWaitList), busAddresses_(busAddr) { - std::vector::const_iterator itr; - for (itr = memObjects.begin(); itr != memObjects.end(); itr++) { - (*itr)->retain(); - memObjects_.push_back(*itr); + for (const auto& it : memObjects) { + it->retain(); + memObjects_.push_back(it); } } virtual void submit(device::VirtualDevice& device) { device.submitMakeBuffersResident(*this); } void releaseResources() { - std::vector::const_iterator itr; - for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { - (*itr)->release(); + for (const auto& it : memObjects_) { + it->release(); } Command::releaseResources(); } diff --git a/projects/clr/rocclr/runtime/platform/commandqueue.cpp b/projects/clr/rocclr/runtime/platform/commandqueue.cpp index 6a452042ce..3604820d47 100644 --- a/projects/clr/rocclr/runtime/platform/commandqueue.cpp +++ b/projects/clr/rocclr/runtime/platform/commandqueue.cpp @@ -96,15 +96,14 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) { // Process the command's event wait list. const Command::EventWaitList& events = command->eventWaitList(); - Command::EventWaitList::const_iterator it; bool dependencyFailed = false; - for (it = events.begin(); it != events.end(); ++it) { + for (const auto& it : events) { // Only wait if the command is enqueued into another queue. - if ((*it)->command().queue() != this) { + if (it->command().queue() != this) { virtualDevice->flush(head, true); tail = head = NULL; - dependencyFailed |= !(*it)->awaitCompletion(); + dependencyFailed |= !it->awaitCompletion(); } } diff --git a/projects/clr/rocclr/runtime/platform/context.cpp b/projects/clr/rocclr/runtime/platform/context.cpp index 15faac4396..e05e866c18 100644 --- a/projects/clr/rocclr/runtime/platform/context.cpp +++ b/projects/clr/rocclr/runtime/platform/context.cpp @@ -62,10 +62,9 @@ Context::~Context() { // Dissociate OCL context with any external device if (info_.flags_ & (GLDeviceKhr | D3D10DeviceKhr | D3D11DeviceKhr)) { - std::vector::const_iterator it; // Loop through all devices - for (it = devices_.begin(); it != devices_.end(); it++) { - (*it)->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY); + for (const auto& it : devices_) { + it->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY); } } @@ -218,10 +217,9 @@ int Context::create(const intptr_t* properties) { // Check if OCL context can be associated with any external device if (info_.flags_ & (D3D10DeviceKhr | D3D11DeviceKhr | GLDeviceKhr | D3D9DeviceKhr | D3D9DeviceEXKhr | D3D9DeviceVAKhr)) { - std::vector::const_iterator it; // Loop through all devices - for (it = devices_.begin(); it != devices_.end(); it++) { - if (!(*it)->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) { + for (const auto& it : devices_) { + if (!it->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) { result = CL_INVALID_VALUE; } } @@ -331,10 +329,9 @@ void Context::svmFree(void* ptr) const { } bool Context::containsDevice(const Device* device) const { - std::vector::const_iterator it; - for (it = devices_.begin(); it != devices_.end(); ++it) { - if (device == *it || (*it)->isAncestor(device)) { + for (const auto& it : devices_) { + if (device == it || it->isAncestor(device)) { return true; } } @@ -342,8 +339,8 @@ bool Context::containsDevice(const Device* device) const { } DeviceQueue* Context::defDeviceQueue(const Device& dev) const { - std::map::const_iterator it = deviceQueues_.find(&dev); - if (it != deviceQueues_.end()) { + const auto it = deviceQueues_.find(&dev); + if (it != deviceQueues_.cend()) { return it->second.defDeviceQueue_; } else { return NULL; diff --git a/projects/clr/rocclr/runtime/platform/context.hpp b/projects/clr/rocclr/runtime/platform/context.hpp index dae6cd3449..be05d5cbee 100644 --- a/projects/clr/rocclr/runtime/platform/context.hpp +++ b/projects/clr/rocclr/runtime/platform/context.hpp @@ -11,7 +11,7 @@ #include "platform/agent.hpp" #include -#include +#include namespace amd { @@ -197,8 +197,8 @@ class Context : public RuntimeObject { GLFunctions* glenv_; //!< OpenGL context Device* customHostAllocDevice_; //!< Device responsible for host allocations std::vector svmAllocDevice_; //!< Devices can support SVM allocations - std::map deviceQueues_; //!< Device queues mapping - mutable Monitor ctxLock_; //!< Lock for the context access + std::unordered_map deviceQueues_; //!< Device queues mapping + mutable Monitor ctxLock_; //!< Lock for the context access }; /*! @} diff --git a/projects/clr/rocclr/runtime/platform/memory.hpp b/projects/clr/rocclr/runtime/platform/memory.hpp index 227738f888..a70a00644b 100644 --- a/projects/clr/rocclr/runtime/platform/memory.hpp +++ b/projects/clr/rocclr/runtime/platform/memory.hpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace device { class Memory; @@ -124,7 +125,7 @@ class Memory : public amd::RuntimeObject { DeviceMemory* deviceMemories_; //! The device alloced state - std::map deviceAlloced_; + std::unordered_map deviceAlloced_; //! Linked list of destructor callbacks. std::atomic destructorCallbacks_; diff --git a/projects/clr/rocclr/runtime/platform/perfctr.hpp b/projects/clr/rocclr/runtime/platform/perfctr.hpp index 5345b31e56..fe2a01aafa 100644 --- a/projects/clr/rocclr/runtime/platform/perfctr.hpp +++ b/projects/clr/rocclr/runtime/platform/perfctr.hpp @@ -24,7 +24,7 @@ namespace amd { */ class PerfCounter : public RuntimeObject { public: - typedef std::map Properties; + typedef std::unordered_map Properties; //! Constructor of the performance counter object PerfCounter(const Device& device, //!< device object diff --git a/projects/clr/rocclr/runtime/platform/program.cpp b/projects/clr/rocclr/runtime/platform/program.cpp index 087ffb5fb4..ffcd627b3e 100644 --- a/projects/clr/rocclr/runtime/platform/program.cpp +++ b/projects/clr/rocclr/runtime/platform/program.cpp @@ -21,13 +21,12 @@ namespace amd { Program::~Program() { // Destroy all device programs - deviceprograms_t::const_iterator it, itEnd; - for (it = devicePrograms_.begin(), itEnd = devicePrograms_.end(); it != itEnd; ++it) { - delete it->second; + for (const auto& it : devicePrograms_) { + delete it.second; } - for (devicebinary_t::const_iterator IT = binary_.begin(), IE = binary_.end(); IT != IE; ++IT) { - const binary_t& Bin = IT->second; + for (const auto& it : binary_) { + const binary_t& Bin = it.second; if (Bin.first) { delete[] Bin.first; } @@ -43,8 +42,8 @@ const Symbol* Program::findSymbol(const char* kernelName) const { return NULL; } - symbols_t::const_iterator it = symbolTable_->find(kernelName); - return (it == symbolTable_->end()) ? NULL : &it->second; + const auto it = symbolTable_->find(kernelName); + return (it == symbolTable_->cend()) ? NULL : &it->second; } cl_int Program::addDeviceProgram(Device& device, const void* image, size_t length, @@ -151,8 +150,8 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt } device::Program* Program::getDeviceProgram(const Device& device) const { - deviceprograms_t::const_iterator it = devicePrograms_.find(&device.rootDevice()); - if (it == devicePrograms_.end()) { + const auto it = devicePrograms_.find(&device.rootDevice()); + if (it == devicePrograms_.cend()) { return NULL; } return it->second; @@ -198,16 +197,15 @@ cl_int Program::compile(const std::vector& devices, size_t numHeaders, } // Compile the program programs associated with the given devices. - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - device::Program* devProgram = getDeviceProgram(**it); + for (const auto& it : devices) { + device::Program* devProgram = getDeviceProgram(*it); if (devProgram == NULL) { - const binary_t& bin = binary(**it); - retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions); + const binary_t& bin = binary(*it); + retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } - devProgram = getDeviceProgram(**it); + devProgram = getDeviceProgram(*it); } if (devProgram->type() == device::Program::TYPE_INTERMEDIATE || language_ == SPIRV) { @@ -277,8 +275,7 @@ cl_int Program::link(const std::vector& devices, size_t numInputs, } // Link the program programs associated with the given devices. - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { + for (const auto& it : devices) { // find the corresponding device program in each input program std::vector inputDevPrograms(numInputs); bool found = false; @@ -288,8 +285,8 @@ cl_int Program::link(const std::vector& devices, size_t numInputs, parsedOptions.oVariables->BinaryIsSpirv = true; } deviceprograms_t inputDevProgs = inputProgram.devicePrograms(); - deviceprograms_t::const_iterator findIt = inputDevProgs.find(*it); - if (findIt == inputDevProgs.end()) { + const auto findIt = inputDevProgs.find(it); + if (findIt == inputDevProgs.cend()) { if (found) break; continue; } @@ -328,14 +325,14 @@ cl_int Program::link(const std::vector& devices, size_t numInputs, return CL_INVALID_VALUE; } - device::Program* devProgram = getDeviceProgram(**it); + device::Program* devProgram = getDeviceProgram(*it); if (devProgram == NULL) { - const binary_t& bin = binary(**it); - retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions); + const binary_t& bin = binary(*it); + retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } - devProgram = getDeviceProgram(**it); + devProgram = getDeviceProgram(*it); } // We only build a Device-Program once @@ -359,16 +356,14 @@ cl_int Program::link(const std::vector& devices, size_t numInputs, } // Rebuild the symbol table - deviceprograms_t::iterator sit; - for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) { - const Device& device = *sit->first; - const device::Program& program = *sit->second; + for (const auto& sit : devicePrograms_) { + const Device& device = *(sit.first); + const device::Program& program = *(sit.second); const device::Program::kernels_t& kernels = program.kernels(); - device::Program::kernels_t::const_iterator kit; - for (kit = kernels.begin(); kit != kernels.end(); ++kit) { - const std::string& name = kit->first; - const device::Kernel* devKernel = kit->second; + for (const auto& it : kernels) { + const std::string& name = it.first; + const device::Kernel* devKernel = it.second; Symbol& symbol = (*symbolTable_)[name]; if (!symbol.setDeviceKernel(device, devKernel)) { @@ -379,9 +374,8 @@ cl_int Program::link(const std::vector& devices, size_t numInputs, // Create a string with all kernel names from the program if (kernelNames_.length() == 0) { - amd::Program::symbols_t::const_iterator it; - for (it = symbols().begin(); it != symbols().end(); ++it) { - if (it != symbols().begin()) { + for (auto it = symbols().cbegin(); it != symbols().cend(); ++it) { + if (it != symbols().cbegin()) { kernelNames_.append(1, ';'); } kernelNames_.append(it->first.c_str()); @@ -474,20 +468,19 @@ cl_int Program::build(const std::vector& devices, const char* options, } // Build the program programs associated with the given devices. - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - device::Program* devProgram = getDeviceProgram(**it); + for (const auto& it : devices) { + device::Program* devProgram = getDeviceProgram(*it); if (devProgram == NULL) { - const binary_t& bin = binary(**it); + const binary_t& bin = binary(*it); if (sourceCode_.empty() && (bin.first == NULL)) { retval = false; continue; } - retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions); + retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } - devProgram = getDeviceProgram(**it); + devProgram = getDeviceProgram(*it); } parsedOptions.oVariables->AssumeAlias = true; @@ -518,16 +511,14 @@ cl_int Program::build(const std::vector& devices, const char* options, } // Rebuild the symbol table - deviceprograms_t::iterator sit; - for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) { - const Device& device = *sit->first; - const device::Program& program = *sit->second; + for (const auto& it : devicePrograms_) { + const Device& device = *(it.first); + const device::Program& program = *(it.second); const device::Program::kernels_t& kernels = program.kernels(); - device::Program::kernels_t::const_iterator kit; - for (kit = kernels.begin(); kit != kernels.end(); ++kit) { - const std::string& name = kit->first; - const device::Kernel* devKernel = kit->second; + for (const auto& kit : kernels) { + const std::string& name = kit.first; + const device::Kernel* devKernel = kit.second; Symbol& symbol = (*symbolTable_)[name]; if (!symbol.setDeviceKernel(device, devKernel)) { @@ -538,9 +529,8 @@ cl_int Program::build(const std::vector& devices, const char* options, // Create a string with all kernel names from the program if (kernelNames_.length() == 0) { - amd::Program::symbols_t::const_iterator it; - for (it = symbols().begin(); it != symbols().end(); ++it) { - if (it != symbols().begin()) { + for (auto it = symbols().cbegin(); it != symbols().cend(); ++it) { + if (it != symbols().cbegin()) { kernelNames_.append(1, ';'); } kernelNames_.append(it->first.c_str()); @@ -555,12 +545,10 @@ cl_int Program::build(const std::vector& devices, const char* options, } void Program::clear() { - deviceprograms_t::iterator sit; - // Destroy old programs if we have any - for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) { + for (const auto& it : devicePrograms_) { // Destroy device program - delete sit->second; + delete it.second; } devicePrograms_.clear(); @@ -631,13 +619,13 @@ bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func, b const device::Kernel* Symbol::getDeviceKernel(const Device& device, bool noAlias) const { const devicekernels_t* devKernels = (noAlias) ? &deviceKernels_ : &devKernelsNoOpt_; - devicekernels_t::const_iterator itEnd = devKernels->end(); - devicekernels_t::const_iterator it = devKernels->find(&device); + const auto itEnd = devKernels->cend(); + auto it = devKernels->find(&device); if (it != itEnd) { return it->second; } - for (it = devKernels->begin(); it != itEnd; ++it) { + for (it = devKernels->cbegin(); it != itEnd; ++it) { if (it->first->isAncestor(&device)) { return it->second; } diff --git a/projects/clr/rocclr/runtime/platform/program.hpp b/projects/clr/rocclr/runtime/platform/program.hpp index b22f82d322..597a03231c 100644 --- a/projects/clr/rocclr/runtime/platform/program.hpp +++ b/projects/clr/rocclr/runtime/platform/program.hpp @@ -35,7 +35,7 @@ namespace amd { //! A kernel function symbol class Symbol : public HeapObject { public: - typedef std::map devicekernels_t; + typedef std::unordered_map devicekernels_t; private: devicekernels_t deviceKernels_; //! All device kernels objects. @@ -68,9 +68,9 @@ class Program : public RuntimeObject { public: typedef std::pair binary_t; typedef std::set devicelist_t; - typedef std::map devicebinary_t; - typedef std::map deviceprograms_t; - typedef std::map symbols_t; + typedef std::unordered_map devicebinary_t; + typedef std::unordered_map deviceprograms_t; + typedef std::unordered_map symbols_t; enum Language { Binary = 0, diff --git a/projects/clr/rocclr/runtime/platform/sampler.hpp b/projects/clr/rocclr/runtime/platform/sampler.hpp index 1e3b01cbbb..bb273fb9d2 100644 --- a/projects/clr/rocclr/runtime/platform/sampler.hpp +++ b/projects/clr/rocclr/runtime/platform/sampler.hpp @@ -14,7 +14,7 @@ namespace amd { //! Abstraction layer sampler class class Sampler : public RuntimeObject { public: - typedef std::map DeviceSamplers; + typedef std::unordered_map DeviceSamplers; //! \note the sampler states must match the compiler's defines. //! See amd_ocl_sys_predef.c diff --git a/projects/clr/rocclr/runtime/utils/flags.cpp b/projects/clr/rocclr/runtime/utils/flags.cpp index be0526414d..d45ec6b5a3 100644 --- a/projects/clr/rocclr/runtime/utils/flags.cpp +++ b/projects/clr/rocclr/runtime/utils/flags.cpp @@ -5,7 +5,7 @@ #include "top.hpp" #include "utils/flags.hpp" -#include +#include #include #include #include @@ -75,7 +75,7 @@ void Flag::tearDown() { } bool Flag::init() { - typedef std::map vars_type; + typedef std::unordered_map vars_type; vars_type vars; #ifdef _WIN32 @@ -116,8 +116,8 @@ bool Flag::init() { for (size_t i = 0; i < numFlags_; ++i) { Flag& flag = flags_[i]; - vars_type::iterator it = vars.find(flag.name_); - if (it != vars.end()) { + const auto it = vars.find(flag.name_); + if (it != vars.cend()) { flag.setValue(it->second); } }