P4 to Git Change 1536925 by vsytchen@vsytchen-ocl-win10 on 2018/04/04 17:20:38
SWDEV-79445 - OCL generic changes and code clean-up
1. This change replaces the use of std::map with std::unordered_map to improve lookup/insert time.
2. Replace the use of std::make_pair and std::pair constructor with uniform initialization for cleaner code.
3. Replace the use of std::Container::iterator type with the auto keyword for cleaner code.
4. Use range based for loops where needed.
ReviewBoardURL = http://ocltc.amd.com/reviews/r/14517/diff/
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#57 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#46 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#216 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#297 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#158 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#587 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#322 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#46 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#237 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#242 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#415 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#143 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#46 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/CMakeLists.txt#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#89 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#49 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#102 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#91 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#17 edit
[ROCm/clr commit: d09ca72f74]
このコミットが含まれているのは:
@@ -4,7 +4,7 @@
|
||||
#ifndef APPPROFILE_HPP_
|
||||
#define APPPROFILE_HPP_
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
|
||||
namespace amd {
|
||||
@@ -34,7 +34,7 @@ class AppProfile {
|
||||
void* data_; //!< Pointer to the data
|
||||
};
|
||||
|
||||
typedef std::map<std::string, PropertyData> DataMap;
|
||||
typedef std::unordered_map<std::string, PropertyData> DataMap;
|
||||
|
||||
DataMap propertyDataMap_;
|
||||
std::string appFileName_; // without extension
|
||||
|
||||
@@ -702,9 +702,8 @@ bool Program::compileImpl(const std::string& sourceCode,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
@@ -1028,8 +1027,8 @@ bool Program::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
#if defined(WITH_ONLINE_COMPILER)
|
||||
std::vector<std::string*> llvmBinaries(inputPrograms.size());
|
||||
std::vector<amd::OclElf::oclElfSections> elfSectionType(inputPrograms.size());
|
||||
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
||||
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
Program* program = (Program*)*it;
|
||||
|
||||
|
||||
@@ -430,10 +430,9 @@ void VirtualCPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd) {
|
||||
|
||||
//! Go through ext objects by one and call member function to execute
|
||||
//! a sequence of external graphics API commands for each external object
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = cmd.getMemList().begin();
|
||||
itr != cmd.getMemList().end(); itr++) {
|
||||
if (*itr) {
|
||||
bError |= !((*itr)->mapExtObjectInCQThread());
|
||||
for (const auto& it : cmd.getMemList()) {
|
||||
if (it) {
|
||||
bError |= !(it->mapExtObjectInCQThread());
|
||||
}
|
||||
}
|
||||
if (bError) {
|
||||
@@ -453,10 +452,9 @@ void VirtualCPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& cmd) {
|
||||
|
||||
bool bError = false;
|
||||
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = cmd.getMemList().begin();
|
||||
itr != cmd.getMemList().end(); itr++) {
|
||||
if (*itr) {
|
||||
bError |= !((*itr)->unmapExtObjectInCQThread());
|
||||
for (const auto& it : cmd.getMemList()) {
|
||||
if (it) {
|
||||
bError |= !(it->unmapExtObjectInCQThread());
|
||||
}
|
||||
}
|
||||
if (bError) {
|
||||
|
||||
@@ -72,7 +72,7 @@ size_t SvmManager::size() {
|
||||
|
||||
void SvmManager::AddSvmBuffer(const void* k, amd::Memory* v) {
|
||||
amd::ScopedLock lock(AllocatedLock_);
|
||||
svmBufferMap_.insert(std::pair<uintptr_t, amd::Memory*>(reinterpret_cast<uintptr_t>(k), v));
|
||||
svmBufferMap_.insert({reinterpret_cast<uintptr_t>(k), v});
|
||||
}
|
||||
|
||||
void SvmManager::RemoveSvmBuffer(const void* k) {
|
||||
@@ -83,7 +83,7 @@ void SvmManager::RemoveSvmBuffer(const void* k) {
|
||||
amd::Memory* SvmManager::FindSvmBuffer(const void* k) {
|
||||
amd::ScopedLock lock(AllocatedLock_);
|
||||
uintptr_t key = reinterpret_cast<uintptr_t>(k);
|
||||
std::map<uintptr_t, amd::Memory*>::iterator it = svmBufferMap_.upper_bound(key);
|
||||
auto it = svmBufferMap_.upper_bound(key);
|
||||
if (it == svmBufferMap_.begin()) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -320,8 +320,7 @@ device::Memory* Device::findMemoryFromVA(const void* ptr, size_t* offset) const
|
||||
amd::ScopedLock lk(*vaCacheAccess_);
|
||||
|
||||
uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
|
||||
std::map<uintptr_t, device::Memory*>::iterator it =
|
||||
vaCacheMap_->upper_bound(reinterpret_cast<uintptr_t>(ptr));
|
||||
auto it = vaCacheMap_->upper_bound(reinterpret_cast<uintptr_t>(ptr));
|
||||
if (it == vaCacheMap_->begin()) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -352,10 +351,10 @@ std::vector<Device*> Device::getDevices(cl_device_type type, bool offlineDevices
|
||||
}
|
||||
|
||||
// Create the list of available devices
|
||||
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
|
||||
for (const auto& it : *devices_) {
|
||||
// Check if the device type is matched
|
||||
if ((*it)->IsTypeMatching(type, offlineDevices)) {
|
||||
result.push_back(*it);
|
||||
if (it->IsTypeMatching(type, offlineDevices)) {
|
||||
result.push_back(it);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -369,9 +368,9 @@ size_t Device::numDevices(cl_device_type type, bool offlineDevices) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
|
||||
for (const auto& it : *devices_) {
|
||||
// Check if the device type is matched
|
||||
if ((*it)->IsTypeMatching(type, offlineDevices)) {
|
||||
if (it->IsTypeMatching(type, offlineDevices)) {
|
||||
++result;
|
||||
}
|
||||
}
|
||||
@@ -393,7 +392,7 @@ bool Device::getDeviceIDs(cl_device_type deviceType, cl_uint numEntries, cl_devi
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<amd::Device*>::iterator it = ret.begin();
|
||||
auto it = ret.cbegin();
|
||||
cl_uint count = std::min(numEntries, (cl_uint)ret.size());
|
||||
|
||||
while (count--) {
|
||||
@@ -707,7 +706,7 @@ void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin,
|
||||
|
||||
// Insert into the map if it's the first region
|
||||
if (++pInfo->count_ == 1) {
|
||||
writeMapInfo_.insert(std::pair<const void*, WriteMapInfo>(mapAddress, info));
|
||||
writeMapInfo_.insert({mapAddress, info});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -729,9 +728,8 @@ Program::~Program() { clear(); }
|
||||
|
||||
void Program::clear() {
|
||||
// Destroy all device kernels
|
||||
kernels_t::const_iterator it;
|
||||
for (it = kernels_.begin(); it != kernels_.end(); ++it) {
|
||||
delete it->second;
|
||||
for (const auto& it : kernels_) {
|
||||
delete it.second;
|
||||
}
|
||||
kernels_.clear();
|
||||
}
|
||||
@@ -1035,8 +1033,8 @@ cl_int Program::build(const std::string& sourceCode, const char* origOptions,
|
||||
bool Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
||||
const amd::option::Options* linkOptions) {
|
||||
amd::option::Options compileOptions;
|
||||
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
||||
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
Program* program = *it;
|
||||
|
||||
@@ -1473,7 +1471,7 @@ bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Program::binary_t ClBinary::data() const { return std::make_pair(binary_, size_); }
|
||||
Program::binary_t ClBinary::data() const { return {binary_, size_}; }
|
||||
|
||||
bool ClBinary::setBinary(const char* theBinary, size_t theBinarySize, bool allocated) {
|
||||
release();
|
||||
|
||||
@@ -828,9 +828,9 @@ class Memory : public amd::HeapObject {
|
||||
//! NB, the map data below is for an API-level map (from clEnqueueMapBuffer),
|
||||
//! not a physical map. When a memory object does not use USE_HOST_PTR we
|
||||
//! can use a remote resource and DMA, avoiding the additional CPU memcpy.
|
||||
amd::Memory* mapMemory_; //!< Memory used as map target buffer
|
||||
volatile size_t indirectMapCount_; //!< Number of maps
|
||||
std::map<const void*, WriteMapInfo> writeMapInfo_; //!< Saved write map info for partial unmap
|
||||
amd::Memory* mapMemory_; //!< Memory used as map target buffer
|
||||
volatile size_t indirectMapCount_; //!< Number of maps
|
||||
std::unordered_map<const void*, WriteMapInfo> writeMapInfo_; //!< Saved write map info for partial unmap
|
||||
|
||||
//! Increment map count
|
||||
void incIndMapCount() { ++indirectMapCount_; }
|
||||
@@ -1017,7 +1017,7 @@ class Kernel : public amd::HeapObject {
|
||||
class Program : public amd::HeapObject {
|
||||
public:
|
||||
typedef std::pair<const void*, size_t> binary_t;
|
||||
typedef std::map<std::string, Kernel*> kernels_t;
|
||||
typedef std::unordered_map<std::string, Kernel*> kernels_t;
|
||||
// type of the program
|
||||
typedef enum {
|
||||
TYPE_NONE = 0, // uncompiled
|
||||
@@ -1347,14 +1347,14 @@ class ClBinary : public amd::HeapObject {
|
||||
|
||||
inline const Program::binary_t Program::binary() const {
|
||||
if (clBinary() == NULL) {
|
||||
return std::make_pair((const void*)0, 0);
|
||||
return {(const void*)0, 0};
|
||||
}
|
||||
return clBinary()->data();
|
||||
}
|
||||
|
||||
inline Program::binary_t Program::binary() {
|
||||
if (clBinary() == NULL) {
|
||||
return std::make_pair((const void*)0, 0);
|
||||
return {(const void*)0, 0};
|
||||
}
|
||||
return clBinary()->data();
|
||||
}
|
||||
@@ -1750,7 +1750,6 @@ class Device : public RuntimeObject {
|
||||
static AppProfile* rocAppProfile_;
|
||||
#endif
|
||||
|
||||
typedef std::vector<Device*>::iterator device_iterator;
|
||||
static std::vector<Device*>* devices_; //!< All known devices
|
||||
|
||||
Device* parent_; //!< This device's parent
|
||||
|
||||
@@ -11,10 +11,8 @@ namespace gpu {
|
||||
|
||||
AppProfile::AppProfile()
|
||||
: amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) {
|
||||
propertyDataMap_.insert(DataMap::value_type(
|
||||
"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)));
|
||||
propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)});
|
||||
|
||||
propertyDataMap_.insert(
|
||||
DataMap::value_type("OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)));
|
||||
propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,16 +73,14 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
functionNameMap[] maps from a function name (linkage name in the generated code)
|
||||
to ElfSymbol_t, which is defined as above.
|
||||
*/
|
||||
std::map<std::string, ElfSymbol_t*> functionNameMap;
|
||||
std::unordered_map<std::string, ElfSymbol_t*> functionNameMap;
|
||||
|
||||
// Keep all kernel ILs if -use-debugil is present (gpu debugging)
|
||||
std::map<std::string, std::string> kernelILs;
|
||||
std::unordered_map<std::string, std::string> kernelILs;
|
||||
|
||||
~TempWrapper() {
|
||||
std::map<std::string, ElfSymbol_t *>::iterator I, IB = functionNameMap.begin(),
|
||||
IE = functionNameMap.end();
|
||||
for (I = IB; I != IE; ++I) {
|
||||
delete[](*I).second;
|
||||
for (const auto& it : functionNameMap) {
|
||||
delete[] it.second;
|
||||
}
|
||||
|
||||
kernelILs.clear();
|
||||
@@ -177,10 +175,8 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
}
|
||||
|
||||
// Append all function metadata to debugIL
|
||||
std::map<std::string, ElfSymbol_t *>::iterator I, IB = tempObj.functionNameMap.begin(),
|
||||
IE = tempObj.functionNameMap.end();
|
||||
for (I = IB; I != IE; ++I) {
|
||||
ElfSymbol_t* elfsymbol = (*I).second;
|
||||
for (const auto& it : tempObj.functionNameMap) {
|
||||
ElfSymbol_t* elfsymbol = it.second;
|
||||
if (elfsymbol == NULL) {
|
||||
// Not valid, skip
|
||||
continue;
|
||||
@@ -202,11 +198,9 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
}
|
||||
|
||||
// Now, patch the IL from debugIL into functionNameMap[]
|
||||
std::map<std::string, std::string>::iterator KI, KIB = tempObj.kernelILs.begin(),
|
||||
KIE = tempObj.kernelILs.end();
|
||||
for (KI = KIB; KI != KIE; ++KI) {
|
||||
const std::string& kn = (*KI).first;
|
||||
const std::string& ilstr = (*KI).second;
|
||||
for (const auto& it : tempObj.kernelILs) {
|
||||
const std::string& kn = it.first;
|
||||
const std::string& ilstr = it.second;
|
||||
|
||||
ElfSymbol_t* elfsymbol = tempObj.functionNameMap[kn];
|
||||
if (elfsymbol == NULL) {
|
||||
@@ -225,10 +219,8 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
|
||||
bool recompiled = false;
|
||||
bool hasKernels = false;
|
||||
std::map<std::string, ElfSymbol_t *>::iterator I, IB = tempObj.functionNameMap.begin(),
|
||||
IE = tempObj.functionNameMap.end();
|
||||
for (I = IB; I != IE; ++I) {
|
||||
ElfSymbol_t* elfsymbol = (*I).second;
|
||||
for (const auto& it : tempObj.functionNameMap) {
|
||||
ElfSymbol_t* elfsymbol = it.second;
|
||||
if (elfsymbol == NULL) {
|
||||
// Not valid, skip
|
||||
continue;
|
||||
@@ -237,7 +229,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
// and the new binary is needed.
|
||||
if (saveAMDIL() && (elfsymbol->SymInfo[NDX_METADATA].size > 0)) {
|
||||
std::string fmetadata = "__OpenCL_";
|
||||
fmetadata.append((*I).first);
|
||||
fmetadata.append(it.first);
|
||||
fmetadata.append("_fmetadata");
|
||||
|
||||
if (!elfOut()->addSymbol(amd::OclElf::RODATA, fmetadata.c_str(),
|
||||
@@ -250,7 +242,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
continue;
|
||||
}
|
||||
amd::OclElf::SymbolInfo* sinfo = &(elfsymbol->SymInfo[0]);
|
||||
std::string FName = (*I).first;
|
||||
std::string FName = it.first;
|
||||
|
||||
// For this kernel, get the demangled kernel name, which is used to identify each kernel.
|
||||
const size_t name_sz = FName.size() - (sizeof(_kernel) - 1) - (sizeof(__OpenCL_) - 1);
|
||||
|
||||
@@ -74,9 +74,8 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
@@ -355,9 +354,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
|
||||
@@ -1152,7 +1152,7 @@ device::Program* Device::createProgram(amd::option::Options* options) {
|
||||
}
|
||||
|
||||
//! Requested devices list as configured by the GPU_DEVICE_ORDINAL
|
||||
typedef std::map<int, bool> requestedDevices_t;
|
||||
typedef std::unordered_map<int, bool> requestedDevices_t;
|
||||
|
||||
//! Parses the requested list of devices to be exposed to the user.
|
||||
static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) {
|
||||
|
||||
@@ -1349,10 +1349,9 @@ bool Kernel::bindGlobalHwCb(VirtualGPU& gpu, VirtualGPU::GslKernelDesc* desc) co
|
||||
|
||||
// Bind HW constant buffers used for the global data store
|
||||
const Program::HwConstBuffers& gds = prog().glbHwCb();
|
||||
for (Program::HwConstBuffers::const_iterator it = gds.begin(); (it != gds.end() && result);
|
||||
++it) {
|
||||
uint idx = it->first;
|
||||
result = bindResource(gpu, *(it->second), idx, ConstantBuffer, idx);
|
||||
for (const auto& it : gds) {
|
||||
uint idx = it.first;
|
||||
result = bindResource(gpu, *(it.second), idx, ConstantBuffer, idx);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -1535,16 +1534,16 @@ void Kernel::debug(VirtualGPU& gpu) const {
|
||||
}
|
||||
}
|
||||
const Program::HwConstBuffers& gds = prog().glbHwCb();
|
||||
for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) {
|
||||
uint idx = it->first;
|
||||
for (const auto& it : gds) {
|
||||
uint idx = it.first;
|
||||
std::stringstream fileName;
|
||||
fileName << counter++ << "_kernel_" << name() << "_const" << idx << ".bin";
|
||||
stubWrite.open(fileName.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
if (stubWrite.is_open()) {
|
||||
address memory = reinterpret_cast<address>((it->second)->map(&gpu, Resource::ReadOnly));
|
||||
address memory = reinterpret_cast<address>(it.second->map(&gpu, Resource::ReadOnly));
|
||||
// Check if we have OpenCL program
|
||||
stubWrite.write(reinterpret_cast<char*>(memory), (it->second)->size());
|
||||
(it->second)->unmap(&gpu);
|
||||
stubWrite.write(reinterpret_cast<char*>(memory), it.second->size());
|
||||
it.second->unmap(&gpu);
|
||||
stubWrite.close();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -543,7 +543,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) {
|
||||
// First DWORD = Offset to where next information is to
|
||||
// be written, initialized to 0
|
||||
// Second DWORD = Number of bytes available for printf data
|
||||
// = buffer size – 2*sizeof(uint32_t)
|
||||
// = buffer size – 2*sizeof(uint32_t)
|
||||
const uint8_t initSize = 2 * sizeof(uint32_t);
|
||||
uint8_t sysMem[initSize];
|
||||
memset(sysMem, 0, initSize);
|
||||
@@ -601,8 +601,6 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
std::vector<uint>::const_iterator ita;
|
||||
uint sb = 0;
|
||||
uint sbt = 0;
|
||||
|
||||
@@ -614,8 +612,8 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
}
|
||||
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
|
||||
sb += sizeof(uint32_t);
|
||||
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
|
||||
sb += *ita;
|
||||
for (const auto& it : info.arguments_) {
|
||||
sb += it;
|
||||
}
|
||||
|
||||
if (sbt + sb > bufSize) {
|
||||
|
||||
@@ -318,7 +318,7 @@ bool NullProgram::linkImpl(amd::option::Options* options) {
|
||||
std::string metadataStr;
|
||||
std::vector<ILFunc*> notCalled;
|
||||
std::vector<ILFunc*> called;
|
||||
std::map<int, const char**> macros;
|
||||
std::unordered_map<int, const char**> macros;
|
||||
size_t j;
|
||||
Kernel::InitData initData = {0};
|
||||
|
||||
@@ -464,8 +464,8 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
amd::option::Options* options, bool createLibrary) {
|
||||
std::vector<std::string*> llvmBinaries(inputPrograms.size());
|
||||
std::vector<amd::OclElf::oclElfSections> elfSectionType(inputPrograms.size());
|
||||
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
||||
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
NullProgram* program = (NullProgram*)*it;
|
||||
|
||||
@@ -682,7 +682,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
std::string metadataStr;
|
||||
std::vector<ILFunc*> notCalled;
|
||||
std::vector<ILFunc*> called;
|
||||
std::map<int, const char**> macros;
|
||||
std::unordered_map<int, const char**> macros;
|
||||
size_t j;
|
||||
Kernel::InitData initData = {0};
|
||||
|
||||
@@ -1433,7 +1433,7 @@ NullKernel* NullProgram::createKernel(const std::string& name, const Kernel::Ini
|
||||
}
|
||||
|
||||
// Invoked from ClBinary
|
||||
bool NullProgram::getAllKernelILs(std::map<std::string, std::string>& allKernelILs,
|
||||
bool NullProgram::getAllKernelILs(std::unordered_map<std::string, std::string>& allKernelILs,
|
||||
std::string& programIL, const char* ilKernelName) {
|
||||
llvm::CompUnit compunit(programIL);
|
||||
if (ilKernelName != NULL) {
|
||||
@@ -1471,8 +1471,8 @@ bool NullProgram::createBinary(amd::option::Options* options) {
|
||||
Program::~Program() {
|
||||
// Destroy the global HW constant buffers
|
||||
const Program::HwConstBuffers& gds = glbHwCb();
|
||||
for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) {
|
||||
delete it->second;
|
||||
for (const auto& it : gds) {
|
||||
delete it.second;
|
||||
}
|
||||
|
||||
// Destroy the global data store
|
||||
@@ -1634,8 +1634,8 @@ bool HSAILProgram::finiBuild(bool isBuildGood) {
|
||||
|
||||
bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
amd::option::Options* options, bool createLibrary) {
|
||||
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
||||
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
acl_error errorCode;
|
||||
|
||||
// For each program we need to extract the LLVMIR and create
|
||||
@@ -2037,13 +2037,12 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
}
|
||||
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
|
||||
delete [] kernelNames;
|
||||
std::vector<std::string>::iterator it = vKernels.begin();
|
||||
bool dynamicParallelism = false;
|
||||
aclMetadata md;
|
||||
md.numHiddenKernelArgs = 0;
|
||||
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
|
||||
for (it; it != vKernels.end(); ++it) {
|
||||
std::string kernelName(*it);
|
||||
for (const auto& it : vKernels) {
|
||||
std::string kernelName(it);
|
||||
std::string openclKernelName = Kernel::openclMangledName(kernelName);
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
||||
openclKernelName.c_str(), &md.numHiddenKernelArgs,
|
||||
|
||||
@@ -256,7 +256,7 @@ class NullProgram : public device::Program {
|
||||
/*! Get all per-kernel IL from programIL, where programIL is the IL for the
|
||||
* whole compilation unit.
|
||||
*/
|
||||
bool getAllKernelILs(std::map<std::string, std::string>& allKernelILs, std::string& programIL,
|
||||
bool getAllKernelILs(std::unordered_map<std::string, std::string>& allKernelILs, std::string& programIL,
|
||||
const char* ilKernelName);
|
||||
|
||||
protected:
|
||||
@@ -322,7 +322,7 @@ class Program : public NullProgram {
|
||||
size_t binarySize = 0 //!< the machine code size
|
||||
);
|
||||
|
||||
typedef std::map<uint, gpu::Memory*> HwConstBuffers;
|
||||
typedef std::unordered_map<uint, gpu::Memory*> HwConstBuffers;
|
||||
|
||||
//! Global HW constant buffers
|
||||
const HwConstBuffers& glbHwCb() const { return constBufs_; }
|
||||
|
||||
@@ -1744,7 +1744,7 @@ bool ResourceCache::addCalResource(Resource::CalResourceDesc* desc, GslResourceR
|
||||
memcpy(descCached, desc, sizeof(Resource::CalResourceDesc));
|
||||
|
||||
// Add the current resource to the cache
|
||||
resCache_.push_front(std::make_pair(descCached, ref));
|
||||
resCache_.push_front({descCached, ref});
|
||||
cacheSize_ += size;
|
||||
result = true;
|
||||
}
|
||||
|
||||
@@ -545,9 +545,9 @@ VirtualGPU::~VirtualGPU() {
|
||||
|
||||
uint i;
|
||||
// Destroy all kernels
|
||||
for (GslKernels::const_iterator it = gslKernels_.begin(); it != gslKernels_.end(); ++it) {
|
||||
if (it->first != 0) {
|
||||
freeKernelDesc(it->second);
|
||||
for (const auto& it : gslKernels_) {
|
||||
if (it.first != 0) {
|
||||
freeKernelDesc(it.second);
|
||||
}
|
||||
}
|
||||
gslKernels_.clear();
|
||||
@@ -1365,10 +1365,9 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd, true);
|
||||
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = vcmd.memObjects().begin(); itr != vcmd.memObjects().end(); ++itr) {
|
||||
for (const auto& it : vcmd.memObjects()) {
|
||||
// Find device memory
|
||||
gpu::Memory* memory = dev().getGpuMemory(*itr);
|
||||
gpu::Memory* memory = dev().getGpuMemory(it);
|
||||
|
||||
if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) {
|
||||
memory->mgpuCacheWriteBack();
|
||||
@@ -2016,7 +2015,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) {
|
||||
|
||||
// Loop through all outstanding command batches
|
||||
while (!cbList_.empty()) {
|
||||
CommandBatchList::const_iterator it = cbList_.begin();
|
||||
const auto it = cbList_.cbegin();
|
||||
// Wait for completion
|
||||
foundEvent = awaitCompletion(*it, vcmd.waitingEvent());
|
||||
// Release a command batch
|
||||
@@ -2210,8 +2209,8 @@ void VirtualGPU::submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand&
|
||||
const size_t memObjSize = cmd.getMemoryObjectSize();
|
||||
const std::vector<amd::Memory*>& memObj = cmd.getMemList();
|
||||
size_t se = 0;
|
||||
for (std::vector<amd::Memory *>::const_iterator itMemObj = memObj.begin();
|
||||
itMemObj != memObj.end(); ++itMemObj, ++se) {
|
||||
for (auto itMemObj = memObj.cbegin();
|
||||
itMemObj != memObj.cend(); ++itMemObj, ++se) {
|
||||
// Find GSL Mem Object
|
||||
gslMemObject gslMemObj = dev().getGpuMemory(*itMemObj)->gslResource();
|
||||
|
||||
@@ -2297,15 +2296,14 @@ void VirtualGPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd);
|
||||
|
||||
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
|
||||
it != vcmd.getMemList().end(); ++it) {
|
||||
for (const auto& it : vcmd.getMemList()) {
|
||||
// amd::Memory object should never be NULL
|
||||
assert(*it && "Memory object for interop is NULL");
|
||||
gpu::Memory* memory = dev().getGpuMemory(*it);
|
||||
assert(it && "Memory object for interop is NULL");
|
||||
gpu::Memory* memory = dev().getGpuMemory(it);
|
||||
|
||||
// If resource is a shared copy of original resource, then
|
||||
// runtime needs to copy data from original resource
|
||||
(*it)->getInteropObj()->copyOrigToShared();
|
||||
it->getInteropObj()->copyOrigToShared();
|
||||
|
||||
// Check if OpenCL has direct access to the interop memory
|
||||
if (memory->interopType() == Memory::InteropDirectAccess) {
|
||||
@@ -2336,11 +2334,10 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd);
|
||||
|
||||
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
|
||||
it != vcmd.getMemList().end(); ++it) {
|
||||
for (const auto& it : vcmd.getMemList()) {
|
||||
// amd::Memory object should never be NULL
|
||||
assert(*it && "Memory object for interop is NULL");
|
||||
gpu::Memory* memory = dev().getGpuMemory(*it);
|
||||
assert(it && "Memory object for interop is NULL");
|
||||
gpu::Memory* memory = dev().getGpuMemory(it);
|
||||
|
||||
// Check if we can use HW interop
|
||||
if (memory->interopType() == Memory::InteropHwEmulation) {
|
||||
@@ -2362,7 +2359,7 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
|
||||
|
||||
// If resource is a shared copy of original resource, then
|
||||
// runtime needs to copy data back to original resource
|
||||
(*it)->getInteropObj()->copySharedToOrig();
|
||||
it->getInteropObj()->copySharedToOrig();
|
||||
}
|
||||
|
||||
profilingEnd(vcmd);
|
||||
@@ -2513,7 +2510,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
wait |= state_.forceWait_;
|
||||
// Loop through all outstanding command batches
|
||||
while (!cbList_.empty()) {
|
||||
CommandBatchList::const_iterator it = cbList_.begin();
|
||||
const auto it = cbList_.cbegin();
|
||||
// Check if command batch finished without a wait
|
||||
bool finished = true;
|
||||
for (uint i = 0; i < AllEngines; ++i) {
|
||||
@@ -2537,8 +2534,8 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); }
|
||||
|
||||
void VirtualGPU::releaseMemObjects(bool scratch) {
|
||||
for (GpuEvents::const_iterator it = gpuEvents_.begin(); it != gpuEvents_.end(); ++it) {
|
||||
GpuEvent event = it->second;
|
||||
for (const auto& it : gpuEvents_) {
|
||||
GpuEvent event = it.second;
|
||||
waitForEvent(&event);
|
||||
}
|
||||
// Unbind all resources.So the queue won't have any bound mem objects
|
||||
|
||||
@@ -380,8 +380,8 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext {
|
||||
) const;
|
||||
|
||||
private:
|
||||
typedef std::map<CALimage, GslKernelDesc*> GslKernels;
|
||||
typedef std::map<gslMemObject, GpuEvent> GpuEvents;
|
||||
typedef std::unordered_map<CALimage, GslKernelDesc*> GslKernels;
|
||||
typedef std::unordered_map<gslMemObject, GpuEvent> GpuEvents;
|
||||
|
||||
//! Finds total amount of necessary iterations
|
||||
inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes
|
||||
|
||||
@@ -11,10 +11,8 @@ namespace pal {
|
||||
|
||||
AppProfile::AppProfile()
|
||||
: amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) {
|
||||
propertyDataMap_.insert(DataMap::value_type(
|
||||
"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)));
|
||||
propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)});
|
||||
|
||||
propertyDataMap_.insert(
|
||||
DataMap::value_type("OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)));
|
||||
propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,9 +65,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
@@ -282,9 +281,8 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
@@ -343,10 +341,10 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
|
||||
case 100:
|
||||
case 110:
|
||||
case 120:
|
||||
hdr = std::make_pair(opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size);
|
||||
hdr = {opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size};
|
||||
break;
|
||||
case 200:
|
||||
hdr = std::make_pair(opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size);
|
||||
hdr = {opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size};
|
||||
break;
|
||||
default:
|
||||
buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
|
||||
|
||||
@@ -1081,7 +1081,7 @@ device::Program* Device::createProgram(amd::option::Options* options) {
|
||||
}
|
||||
|
||||
//! Requested devices list as configured by the GPU_DEVICE_ORDINAL
|
||||
typedef std::map<int, bool> requestedDevices_t;
|
||||
typedef std::unordered_map<int, bool> requestedDevices_t;
|
||||
|
||||
//! Parses the requested list of devices to be exposed to the user.
|
||||
static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) {
|
||||
|
||||
@@ -539,7 +539,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) {
|
||||
// First DWORD = Offset to where next information is to
|
||||
// be written, initialized to 0
|
||||
// Second DWORD = Number of bytes available for printf data
|
||||
// = buffer size – 2*sizeof(uint32_t)
|
||||
// = buffer size � 2*sizeof(uint32_t)
|
||||
const uint8_t initSize = 2 * sizeof(uint32_t);
|
||||
uint8_t sysMem[initSize];
|
||||
memset(sysMem, 0, initSize);
|
||||
@@ -597,7 +597,6 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint>::const_iterator ita;
|
||||
uint sb = 0;
|
||||
uint sbt = 0;
|
||||
|
||||
@@ -609,8 +608,8 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
}
|
||||
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
|
||||
sb += sizeof(uint32_t);
|
||||
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
|
||||
sb += *ita;
|
||||
for (const auto& it : info.arguments_) {
|
||||
sb += it;
|
||||
}
|
||||
|
||||
if (sbt + sb > bufSize) {
|
||||
|
||||
@@ -228,8 +228,8 @@ bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
assert(!"Should not reach here");
|
||||
return false;
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
||||
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
acl_error errorCode;
|
||||
|
||||
// For each program we need to extract the LLVMIR and create
|
||||
@@ -656,10 +656,9 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
}
|
||||
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
|
||||
delete [] kernelNames;
|
||||
std::vector<std::string>::iterator it = vKernels.begin();
|
||||
bool dynamicParallelism = false;
|
||||
for (it; it != vKernels.end(); ++it) {
|
||||
std::string kernelName(*it);
|
||||
for (const auto& it : vKernels) {
|
||||
std::string kernelName(it);
|
||||
std::string openclKernelName = device::Kernel::openclMangledName(kernelName);
|
||||
|
||||
HSAILKernel* aKernel =
|
||||
|
||||
@@ -1133,7 +1133,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
Pal::GpuMemoryCreateInfo createInfo = {};
|
||||
createInfo.size = desc().width_ * elementSize_;
|
||||
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
|
||||
createInfo.alignment = MaxGpuAlignment;
|
||||
createInfo.alignment = desc().scratch_ ? 64*Ki : MaxGpuAlignment;
|
||||
createInfo.vaRange = Pal::VaRange::Default;
|
||||
createInfo.priority = Pal::GpuMemPriority::Normal;
|
||||
|
||||
@@ -1970,7 +1970,7 @@ bool ResourceCache::addGpuMemory(Resource::Descriptor* desc,
|
||||
|
||||
amd::ScopedLock l(&lockCacheOps_);
|
||||
// Add the current resource to the cache
|
||||
resCache_.push_front(std::make_pair(descCached, ref));
|
||||
resCache_.push_front({descCached, ref});
|
||||
ref->gpu_ = nullptr;
|
||||
cacheSize_ += size;
|
||||
result = true;
|
||||
|
||||
@@ -1802,10 +1802,9 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd, true);
|
||||
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = vcmd.memObjects().begin(); itr != vcmd.memObjects().end(); ++itr) {
|
||||
for (const auto& it : vcmd.memObjects()) {
|
||||
// Find device memory
|
||||
pal::Memory* memory = dev().getGpuMemory(*itr);
|
||||
pal::Memory* memory = dev().getGpuMemory(it);
|
||||
|
||||
if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) {
|
||||
memory->mgpuCacheWriteBack();
|
||||
@@ -2478,15 +2477,14 @@ void VirtualGPU::submitAcquireExtObjects(amd::AcquireExtObjectsCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd);
|
||||
|
||||
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
|
||||
it != vcmd.getMemList().end(); ++it) {
|
||||
for (const auto& it : vcmd.getMemList()) {
|
||||
// amd::Memory object should never be nullptr
|
||||
assert(*it && "Memory object for interop is nullptr");
|
||||
pal::Memory* memory = dev().getGpuMemory(*it);
|
||||
assert(it && "Memory object for interop is nullptr");
|
||||
pal::Memory* memory = dev().getGpuMemory(it);
|
||||
|
||||
// If resource is a shared copy of original resource, then
|
||||
// runtime needs to copy data from original resource
|
||||
(*it)->getInteropObj()->copyOrigToShared();
|
||||
it->getInteropObj()->copyOrigToShared();
|
||||
|
||||
// Check if OpenCL has direct access to the interop memory
|
||||
if (memory->interopType() == Memory::InteropDirectAccess) {
|
||||
@@ -2517,11 +2515,10 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd);
|
||||
|
||||
for (std::vector<amd::Memory*>::const_iterator it = vcmd.getMemList().begin();
|
||||
it != vcmd.getMemList().end(); ++it) {
|
||||
for (const auto& it : vcmd.getMemList()) {
|
||||
// amd::Memory object should never be nullptr
|
||||
assert(*it && "Memory object for interop is nullptr");
|
||||
pal::Memory* memory = dev().getGpuMemory(*it);
|
||||
assert(it && "Memory object for interop is nullptr");
|
||||
pal::Memory* memory = dev().getGpuMemory(it);
|
||||
|
||||
// Check if we can use HW interop
|
||||
if (memory->interopType() == Memory::InteropHwEmulation) {
|
||||
@@ -2543,7 +2540,7 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) {
|
||||
|
||||
// If resource is a shared copy of original resource, then
|
||||
// runtime needs to copy data back to original resource
|
||||
(*it)->getInteropObj()->copySharedToOrig();
|
||||
it->getInteropObj()->copySharedToOrig();
|
||||
}
|
||||
|
||||
profilingEnd(vcmd);
|
||||
|
||||
@@ -163,7 +163,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
uint cmdBufIdCurrent_; //!< Current global command buffer ID
|
||||
uint cmbBufIdRetired_; //!< The last retired command buffer ID
|
||||
uint cmdCnt_; //!< Counter of commands
|
||||
std::map<GpuMemoryReference*, uint> memReferences_;
|
||||
std::unordered_map<GpuMemoryReference*, uint> memReferences_;
|
||||
Util::VirtualLinearAllocator vlAlloc_;
|
||||
std::vector<Pal::GpuMemoryRef> palMemRefs_;
|
||||
std::vector<Pal::IGpuMemory*> palMems_;
|
||||
|
||||
@@ -51,19 +51,19 @@ foreach(AMDGCN_LIB_TARGET ${AMDGCN_LIB_TARGETS})
|
||||
if (${AMDGCN_LIB_TARGET} MATCHES "^oclc_isa_version_[0-9]+_lib$")
|
||||
string(REGEX REPLACE "^oclc_isa_version_([0-9]+)_lib$" "\\1" gfxip ${AMDGCN_LIB_TARGET})
|
||||
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc
|
||||
"case ${gfxip}: return std::make_pair(oclc_isa_version_${gfxip}_amdgcn, oclc_isa_version_${gfxip}_amdgcn_size); break;\n")
|
||||
"case ${gfxip}: return {oclc_isa_version_${gfxip}_amdgcn, oclc_isa_version_${gfxip}_amdgcn_size}; break;\n")
|
||||
endif()
|
||||
endforeach()
|
||||
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc
|
||||
"default: return std::make_pair((const void*)0,(size_t)0);\n}\n}\n")
|
||||
"default: return {(const void*)0,(size_t)0};\n}\n}\n")
|
||||
|
||||
foreach(AMDGCN_LIB_TARGET ${AMDGCN_LIB_TARGETS})
|
||||
if (${AMDGCN_LIB_TARGET} MATCHES "oclc_(.*)_on_lib")
|
||||
string(REGEX REPLACE "oclc_(.*)_on_lib" "\\1" function ${AMDGCN_LIB_TARGET})
|
||||
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libraries.amdgcn.inc
|
||||
"static inline std::pair<const char*, size_t> get_oclc_${function}(bool on)\n{ return std::make_pair("
|
||||
"static inline std::pair<const char*, size_t> get_oclc_${function}(bool on)\n{ return {"
|
||||
"(const char*)(on ? oclc_${function}_on_amdgcn : oclc_${function}_off_amdgcn),"
|
||||
"on ? oclc_${function}_on_amdgcn_size : oclc_${function}_off_amdgcn_size);}\n")
|
||||
"on ? oclc_${function}_on_amdgcn_size : oclc_${function}_off_amdgcn_size};}\n")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
|
||||
@@ -183,8 +183,7 @@ void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr)
|
||||
flags, &buf_size, &ptr, nullptr, nullptr)) {
|
||||
// Ask GPUPro driver to provide CPU access to allocation
|
||||
if (0 == Funcs().AmdgpuBoCpuMap(buf_handle, host_ptr)) {
|
||||
allocs_.insert(std::pair<void*, std::pair<amdgpu_bo_handle, uint32_t>>(
|
||||
ptr, std::pair<amdgpu_bo_handle, uint32_t>(buf_handle, shared_handle)));
|
||||
allocs_.insert({ptr, {buf_handle, shared_handle}});
|
||||
}
|
||||
else {
|
||||
hsa_amd_interop_unmap_buffer(ptr);
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "profuncs.hpp"
|
||||
#include "prodriver.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
/*! \addtogroup HSA
|
||||
* @{
|
||||
@@ -53,7 +53,7 @@ private:
|
||||
amdgpu_device_handle dev_handle_; //!< AMD gpu device handle
|
||||
amdgpu_gpu_info gpu_info_; //!< GPU info structure
|
||||
amdgpu_heap_info heap_info_; //!< Information about memory
|
||||
mutable std::map<void*, std::pair<amdgpu_bo_handle, uint32_t>> allocs_; //!< Alloced memory mapping
|
||||
mutable std::unordered_map<void*, std::pair<amdgpu_bo_handle, uint32_t>> allocs_; //!< Alloced memory mapping
|
||||
amd::Monitor* alloc_ops_; //!< Serializes memory allocations/destructions
|
||||
};
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
namespace roc {
|
||||
|
||||
typedef std::map<std::string, device::Kernel*> NameKernelMap;
|
||||
typedef std::unordered_map<std::string, device::Kernel*> NameKernelMap;
|
||||
|
||||
class ClBinary : public device::ClBinary {
|
||||
public:
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
|
||||
#include "os/os.hpp"
|
||||
#include "rocdevice.hpp"
|
||||
@@ -80,9 +79,8 @@ bool HSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
@@ -249,9 +247,8 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if (amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
|
||||
it != end; ++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
for (auto& it : headerIncludeName) {
|
||||
if (it == '/') it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
@@ -309,10 +306,10 @@ bool LightningProgram::compileImpl(const std::string& sourceCode,
|
||||
case 100:
|
||||
case 110:
|
||||
case 120:
|
||||
hdr = std::make_pair(opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size);
|
||||
hdr = {opencl1_2_c_amdgcn, opencl1_2_c_amdgcn_size};
|
||||
break;
|
||||
case 200:
|
||||
hdr = std::make_pair(opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size);
|
||||
hdr = {opencl2_0_c_amdgcn, opencl2_0_c_amdgcn_size};
|
||||
break;
|
||||
default:
|
||||
buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
|
||||
|
||||
@@ -336,12 +336,11 @@ uint64_t PerfCounter::getInfo(uint64_t infoType) const {
|
||||
&data);
|
||||
|
||||
uint64_t result = 0;
|
||||
std::vector<hsa_ven_amd_aqlprofile_info_data_t>::iterator it;
|
||||
for (it = data.begin(); it != data.end(); ++it) {
|
||||
if (it->pmc_data.event.block_name == event_.block_name &&
|
||||
it->pmc_data.event.block_index == event_.block_index &&
|
||||
it->pmc_data.event.counter_id == event_.counter_id) {
|
||||
result += it->pmc_data.result;
|
||||
for (const auto& it : data) {
|
||||
if (it.pmc_data.event.block_name == event_.block_name &&
|
||||
it.pmc_data.event.block_index == event_.block_index &&
|
||||
it.pmc_data.event.counter_id == event_.counter_id) {
|
||||
result += it.pmc_data.result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
@@ -415,7 +415,6 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint>::const_iterator ita;
|
||||
uint sb = 0;
|
||||
uint sbt = 0;
|
||||
|
||||
@@ -427,8 +426,8 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
}
|
||||
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
|
||||
sb += sizeof(uint32_t);
|
||||
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
|
||||
sb += *ita;
|
||||
for (const auto& ita : info.arguments_) {
|
||||
sb += ita;
|
||||
}
|
||||
|
||||
size_t idx = 1;
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
|
||||
namespace roc {
|
||||
|
||||
@@ -535,8 +534,8 @@ aclType HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& compl
|
||||
|
||||
bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
amd::option::Options* options, bool createLibrary) {
|
||||
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
||||
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
acl_error errorCode;
|
||||
|
||||
// For each program we need to extract the LLVMIR and create
|
||||
|
||||
@@ -370,11 +370,10 @@ void UnmapMemoryCommand::releaseResources() {
|
||||
|
||||
bool MigrateMemObjectsCommand::validateMemory() {
|
||||
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
|
||||
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
||||
for (const auto& it : memObjects_) {
|
||||
device::Memory* mem = it->getDeviceMemory(queue()->device());
|
||||
if (NULL == mem) {
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -434,11 +433,10 @@ cl_int NDRangeKernelCommand::validateMemory() {
|
||||
bool ExtObjectsCommand::validateMemory() {
|
||||
bool retVal = true;
|
||||
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
||||
itr != memObjects_.end(); itr++) {
|
||||
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
||||
for (const auto& it : memObjects_) {
|
||||
device::Memory* mem = it->getDeviceMemory(queue()->device());
|
||||
if (NULL == mem) {
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
|
||||
return false;
|
||||
}
|
||||
retVal = processGLResource(mem);
|
||||
@@ -457,11 +455,10 @@ bool ReleaseExtObjectsCommand::processGLResource(device::Memory* mem) {
|
||||
|
||||
bool MakeBuffersResidentCommand::validateMemory() {
|
||||
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
||||
itr != memObjects_.end(); itr++) {
|
||||
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
||||
for (const auto& it : memObjects_) {
|
||||
device::Memory* mem = it->getDeviceMemory(queue()->device());
|
||||
if (NULL == mem) {
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -471,16 +468,14 @@ bool MakeBuffersResidentCommand::validateMemory() {
|
||||
}
|
||||
bool ThreadTraceMemObjectsCommand::validateMemory() {
|
||||
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
||||
itr != memObjects_.end(); itr++) {
|
||||
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
||||
for (auto& it = memObjects_.cbegin(); it != memObjects_.cend(); it++) {
|
||||
device::Memory* mem = (*it)->getDeviceMemory(queue()->device());
|
||||
if (NULL == mem) {
|
||||
std::vector<amd::Memory*>::const_iterator tmpItr;
|
||||
for (tmpItr = memObjects_.begin(); tmpItr != itr; tmpItr++) {
|
||||
device::Memory* tmpMem = (*tmpItr)->getDeviceMemory(queue()->device());
|
||||
for (auto& tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) {
|
||||
device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device());
|
||||
delete tmpMem;
|
||||
}
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*itr)->getSize());
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -720,10 +720,9 @@ class MigrateMemObjectsCommand : public Command {
|
||||
const std::vector<amd::Memory*>& memObjects,
|
||||
cl_mem_migration_flags flags)
|
||||
: Command(queue, type, eventWaitList), migrationFlags_(flags) {
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = memObjects.begin(); itr != memObjects.end(); itr++) {
|
||||
(*itr)->retain();
|
||||
memObjects_.push_back(*itr);
|
||||
for (const auto& it : memObjects) {
|
||||
it->retain();
|
||||
memObjects_.push_back(it);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -731,9 +730,8 @@ class MigrateMemObjectsCommand : public Command {
|
||||
|
||||
//! Release all resources associated with this command
|
||||
void releaseResources() {
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
|
||||
(*itr)->release();
|
||||
for (const auto& it : memObjects_) {
|
||||
it->release();
|
||||
}
|
||||
Command::releaseResources();
|
||||
}
|
||||
@@ -837,18 +835,16 @@ class ExtObjectsCommand : public Command {
|
||||
ExtObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects,
|
||||
const std::vector<amd::Memory*>& memoryObjects, cl_command_type type)
|
||||
: Command(queue, type, eventWaitList) {
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = memoryObjects.begin();
|
||||
itr != memoryObjects.end(); itr++) {
|
||||
(*itr)->retain();
|
||||
memObjects_.push_back(*itr);
|
||||
for (const auto& it : memoryObjects) {
|
||||
it->retain();
|
||||
memObjects_.push_back(it);
|
||||
}
|
||||
}
|
||||
|
||||
//! Release all resources associated with this command
|
||||
void releaseResources() {
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
||||
itr != memObjects_.end(); itr++) {
|
||||
(*itr)->release();
|
||||
for (const auto& it : memObjects_) {
|
||||
it->release();
|
||||
}
|
||||
Command::releaseResources();
|
||||
}
|
||||
@@ -954,9 +950,8 @@ class ThreadTraceMemObjectsCommand : public Command {
|
||||
//! Release all resources associated with this command
|
||||
void releaseResources() {
|
||||
threadTrace_.release();
|
||||
for (std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
||||
itr != memObjects_.end(); itr++) {
|
||||
(*itr)->release();
|
||||
for (const auto& itr : memObjects_) {
|
||||
itr->release();
|
||||
}
|
||||
Command::releaseResources();
|
||||
}
|
||||
@@ -1067,19 +1062,17 @@ class MakeBuffersResidentCommand : public Command {
|
||||
const std::vector<amd::Memory*>& memObjects,
|
||||
cl_bus_address_amd* busAddr)
|
||||
: Command(queue, type, eventWaitList), busAddresses_(busAddr) {
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = memObjects.begin(); itr != memObjects.end(); itr++) {
|
||||
(*itr)->retain();
|
||||
memObjects_.push_back(*itr);
|
||||
for (const auto& it : memObjects) {
|
||||
it->retain();
|
||||
memObjects_.push_back(it);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void submit(device::VirtualDevice& device) { device.submitMakeBuffersResident(*this); }
|
||||
|
||||
void releaseResources() {
|
||||
std::vector<amd::Memory*>::const_iterator itr;
|
||||
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
|
||||
(*itr)->release();
|
||||
for (const auto& it : memObjects_) {
|
||||
it->release();
|
||||
}
|
||||
Command::releaseResources();
|
||||
}
|
||||
|
||||
@@ -96,15 +96,14 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
|
||||
|
||||
// Process the command's event wait list.
|
||||
const Command::EventWaitList& events = command->eventWaitList();
|
||||
Command::EventWaitList::const_iterator it;
|
||||
bool dependencyFailed = false;
|
||||
|
||||
for (it = events.begin(); it != events.end(); ++it) {
|
||||
for (const auto& it : events) {
|
||||
// Only wait if the command is enqueued into another queue.
|
||||
if ((*it)->command().queue() != this) {
|
||||
if (it->command().queue() != this) {
|
||||
virtualDevice->flush(head, true);
|
||||
tail = head = NULL;
|
||||
dependencyFailed |= !(*it)->awaitCompletion();
|
||||
dependencyFailed |= !it->awaitCompletion();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,10 +62,9 @@ Context::~Context() {
|
||||
|
||||
// Dissociate OCL context with any external device
|
||||
if (info_.flags_ & (GLDeviceKhr | D3D10DeviceKhr | D3D11DeviceKhr)) {
|
||||
std::vector<Device*>::const_iterator it;
|
||||
// Loop through all devices
|
||||
for (it = devices_.begin(); it != devices_.end(); it++) {
|
||||
(*it)->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY);
|
||||
for (const auto& it : devices_) {
|
||||
it->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,10 +217,9 @@ int Context::create(const intptr_t* properties) {
|
||||
// Check if OCL context can be associated with any external device
|
||||
if (info_.flags_ & (D3D10DeviceKhr | D3D11DeviceKhr | GLDeviceKhr | D3D9DeviceKhr |
|
||||
D3D9DeviceEXKhr | D3D9DeviceVAKhr)) {
|
||||
std::vector<Device*>::const_iterator it;
|
||||
// Loop through all devices
|
||||
for (it = devices_.begin(); it != devices_.end(); it++) {
|
||||
if (!(*it)->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) {
|
||||
for (const auto& it : devices_) {
|
||||
if (!it->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) {
|
||||
result = CL_INVALID_VALUE;
|
||||
}
|
||||
}
|
||||
@@ -331,10 +329,9 @@ void Context::svmFree(void* ptr) const {
|
||||
}
|
||||
|
||||
bool Context::containsDevice(const Device* device) const {
|
||||
std::vector<Device*>::const_iterator it;
|
||||
|
||||
for (it = devices_.begin(); it != devices_.end(); ++it) {
|
||||
if (device == *it || (*it)->isAncestor(device)) {
|
||||
for (const auto& it : devices_) {
|
||||
if (device == it || it->isAncestor(device)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -342,8 +339,8 @@ bool Context::containsDevice(const Device* device) const {
|
||||
}
|
||||
|
||||
DeviceQueue* Context::defDeviceQueue(const Device& dev) const {
|
||||
std::map<const Device*, DeviceQueueInfo>::const_iterator it = deviceQueues_.find(&dev);
|
||||
if (it != deviceQueues_.end()) {
|
||||
const auto it = deviceQueues_.find(&dev);
|
||||
if (it != deviceQueues_.cend()) {
|
||||
return it->second.defDeviceQueue_;
|
||||
} else {
|
||||
return NULL;
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include "platform/agent.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace amd {
|
||||
|
||||
@@ -197,8 +197,8 @@ class Context : public RuntimeObject {
|
||||
GLFunctions* glenv_; //!< OpenGL context
|
||||
Device* customHostAllocDevice_; //!< Device responsible for host allocations
|
||||
std::vector<Device*> svmAllocDevice_; //!< Devices can support SVM allocations
|
||||
std::map<const Device*, DeviceQueueInfo> deviceQueues_; //!< Device queues mapping
|
||||
mutable Monitor ctxLock_; //!< Lock for the context access
|
||||
std::unordered_map<const Device*, DeviceQueueInfo> deviceQueues_; //!< Device queues mapping
|
||||
mutable Monitor ctxLock_; //!< Lock for the context access
|
||||
};
|
||||
|
||||
/*! @}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace device {
|
||||
class Memory;
|
||||
@@ -124,7 +125,7 @@ class Memory : public amd::RuntimeObject {
|
||||
DeviceMemory* deviceMemories_;
|
||||
|
||||
//! The device alloced state
|
||||
std::map<const Device*, AllocState> deviceAlloced_;
|
||||
std::unordered_map<const Device*, AllocState> deviceAlloced_;
|
||||
|
||||
//! Linked list of destructor callbacks.
|
||||
std::atomic<DestructorCallBackEntry*> destructorCallbacks_;
|
||||
|
||||
@@ -24,7 +24,7 @@ namespace amd {
|
||||
*/
|
||||
class PerfCounter : public RuntimeObject {
|
||||
public:
|
||||
typedef std::map<cl_perfcounter_property, ulong> Properties;
|
||||
typedef std::unordered_map<cl_perfcounter_property, ulong> Properties;
|
||||
|
||||
//! Constructor of the performance counter object
|
||||
PerfCounter(const Device& device, //!< device object
|
||||
|
||||
@@ -21,13 +21,12 @@ namespace amd {
|
||||
|
||||
Program::~Program() {
|
||||
// Destroy all device programs
|
||||
deviceprograms_t::const_iterator it, itEnd;
|
||||
for (it = devicePrograms_.begin(), itEnd = devicePrograms_.end(); it != itEnd; ++it) {
|
||||
delete it->second;
|
||||
for (const auto& it : devicePrograms_) {
|
||||
delete it.second;
|
||||
}
|
||||
|
||||
for (devicebinary_t::const_iterator IT = binary_.begin(), IE = binary_.end(); IT != IE; ++IT) {
|
||||
const binary_t& Bin = IT->second;
|
||||
for (const auto& it : binary_) {
|
||||
const binary_t& Bin = it.second;
|
||||
if (Bin.first) {
|
||||
delete[] Bin.first;
|
||||
}
|
||||
@@ -43,8 +42,8 @@ const Symbol* Program::findSymbol(const char* kernelName) const {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
symbols_t::const_iterator it = symbolTable_->find(kernelName);
|
||||
return (it == symbolTable_->end()) ? NULL : &it->second;
|
||||
const auto it = symbolTable_->find(kernelName);
|
||||
return (it == symbolTable_->cend()) ? NULL : &it->second;
|
||||
}
|
||||
|
||||
cl_int Program::addDeviceProgram(Device& device, const void* image, size_t length,
|
||||
@@ -151,8 +150,8 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt
|
||||
}
|
||||
|
||||
device::Program* Program::getDeviceProgram(const Device& device) const {
|
||||
deviceprograms_t::const_iterator it = devicePrograms_.find(&device.rootDevice());
|
||||
if (it == devicePrograms_.end()) {
|
||||
const auto it = devicePrograms_.find(&device.rootDevice());
|
||||
if (it == devicePrograms_.cend()) {
|
||||
return NULL;
|
||||
}
|
||||
return it->second;
|
||||
@@ -198,16 +197,15 @@ cl_int Program::compile(const std::vector<Device*>& devices, size_t numHeaders,
|
||||
}
|
||||
|
||||
// Compile the program programs associated with the given devices.
|
||||
std::vector<Device*>::const_iterator it;
|
||||
for (it = devices.begin(); it != devices.end(); ++it) {
|
||||
device::Program* devProgram = getDeviceProgram(**it);
|
||||
for (const auto& it : devices) {
|
||||
device::Program* devProgram = getDeviceProgram(*it);
|
||||
if (devProgram == NULL) {
|
||||
const binary_t& bin = binary(**it);
|
||||
retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
|
||||
const binary_t& bin = binary(*it);
|
||||
retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions);
|
||||
if (retval != CL_SUCCESS) {
|
||||
return retval;
|
||||
}
|
||||
devProgram = getDeviceProgram(**it);
|
||||
devProgram = getDeviceProgram(*it);
|
||||
}
|
||||
|
||||
if (devProgram->type() == device::Program::TYPE_INTERMEDIATE || language_ == SPIRV) {
|
||||
@@ -277,8 +275,7 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
}
|
||||
|
||||
// Link the program programs associated with the given devices.
|
||||
std::vector<Device*>::const_iterator it;
|
||||
for (it = devices.begin(); it != devices.end(); ++it) {
|
||||
for (const auto& it : devices) {
|
||||
// find the corresponding device program in each input program
|
||||
std::vector<device::Program*> inputDevPrograms(numInputs);
|
||||
bool found = false;
|
||||
@@ -288,8 +285,8 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
parsedOptions.oVariables->BinaryIsSpirv = true;
|
||||
}
|
||||
deviceprograms_t inputDevProgs = inputProgram.devicePrograms();
|
||||
deviceprograms_t::const_iterator findIt = inputDevProgs.find(*it);
|
||||
if (findIt == inputDevProgs.end()) {
|
||||
const auto findIt = inputDevProgs.find(it);
|
||||
if (findIt == inputDevProgs.cend()) {
|
||||
if (found) break;
|
||||
continue;
|
||||
}
|
||||
@@ -328,14 +325,14 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
device::Program* devProgram = getDeviceProgram(**it);
|
||||
device::Program* devProgram = getDeviceProgram(*it);
|
||||
if (devProgram == NULL) {
|
||||
const binary_t& bin = binary(**it);
|
||||
retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
|
||||
const binary_t& bin = binary(*it);
|
||||
retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions);
|
||||
if (retval != CL_SUCCESS) {
|
||||
return retval;
|
||||
}
|
||||
devProgram = getDeviceProgram(**it);
|
||||
devProgram = getDeviceProgram(*it);
|
||||
}
|
||||
|
||||
// We only build a Device-Program once
|
||||
@@ -359,16 +356,14 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
}
|
||||
|
||||
// Rebuild the symbol table
|
||||
deviceprograms_t::iterator sit;
|
||||
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
|
||||
const Device& device = *sit->first;
|
||||
const device::Program& program = *sit->second;
|
||||
for (const auto& sit : devicePrograms_) {
|
||||
const Device& device = *(sit.first);
|
||||
const device::Program& program = *(sit.second);
|
||||
|
||||
const device::Program::kernels_t& kernels = program.kernels();
|
||||
device::Program::kernels_t::const_iterator kit;
|
||||
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
|
||||
const std::string& name = kit->first;
|
||||
const device::Kernel* devKernel = kit->second;
|
||||
for (const auto& it : kernels) {
|
||||
const std::string& name = it.first;
|
||||
const device::Kernel* devKernel = it.second;
|
||||
|
||||
Symbol& symbol = (*symbolTable_)[name];
|
||||
if (!symbol.setDeviceKernel(device, devKernel)) {
|
||||
@@ -379,9 +374,8 @@ cl_int Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
|
||||
// Create a string with all kernel names from the program
|
||||
if (kernelNames_.length() == 0) {
|
||||
amd::Program::symbols_t::const_iterator it;
|
||||
for (it = symbols().begin(); it != symbols().end(); ++it) {
|
||||
if (it != symbols().begin()) {
|
||||
for (auto it = symbols().cbegin(); it != symbols().cend(); ++it) {
|
||||
if (it != symbols().cbegin()) {
|
||||
kernelNames_.append(1, ';');
|
||||
}
|
||||
kernelNames_.append(it->first.c_str());
|
||||
@@ -474,20 +468,19 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
|
||||
}
|
||||
|
||||
// Build the program programs associated with the given devices.
|
||||
std::vector<Device*>::const_iterator it;
|
||||
for (it = devices.begin(); it != devices.end(); ++it) {
|
||||
device::Program* devProgram = getDeviceProgram(**it);
|
||||
for (const auto& it : devices) {
|
||||
device::Program* devProgram = getDeviceProgram(*it);
|
||||
if (devProgram == NULL) {
|
||||
const binary_t& bin = binary(**it);
|
||||
const binary_t& bin = binary(*it);
|
||||
if (sourceCode_.empty() && (bin.first == NULL)) {
|
||||
retval = false;
|
||||
continue;
|
||||
}
|
||||
retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
|
||||
retval = addDeviceProgram(*it, bin.first, bin.second, &parsedOptions);
|
||||
if (retval != CL_SUCCESS) {
|
||||
return retval;
|
||||
}
|
||||
devProgram = getDeviceProgram(**it);
|
||||
devProgram = getDeviceProgram(*it);
|
||||
}
|
||||
|
||||
parsedOptions.oVariables->AssumeAlias = true;
|
||||
@@ -518,16 +511,14 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
|
||||
}
|
||||
|
||||
// Rebuild the symbol table
|
||||
deviceprograms_t::iterator sit;
|
||||
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
|
||||
const Device& device = *sit->first;
|
||||
const device::Program& program = *sit->second;
|
||||
for (const auto& it : devicePrograms_) {
|
||||
const Device& device = *(it.first);
|
||||
const device::Program& program = *(it.second);
|
||||
|
||||
const device::Program::kernels_t& kernels = program.kernels();
|
||||
device::Program::kernels_t::const_iterator kit;
|
||||
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
|
||||
const std::string& name = kit->first;
|
||||
const device::Kernel* devKernel = kit->second;
|
||||
for (const auto& kit : kernels) {
|
||||
const std::string& name = kit.first;
|
||||
const device::Kernel* devKernel = kit.second;
|
||||
|
||||
Symbol& symbol = (*symbolTable_)[name];
|
||||
if (!symbol.setDeviceKernel(device, devKernel)) {
|
||||
@@ -538,9 +529,8 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
|
||||
|
||||
// Create a string with all kernel names from the program
|
||||
if (kernelNames_.length() == 0) {
|
||||
amd::Program::symbols_t::const_iterator it;
|
||||
for (it = symbols().begin(); it != symbols().end(); ++it) {
|
||||
if (it != symbols().begin()) {
|
||||
for (auto it = symbols().cbegin(); it != symbols().cend(); ++it) {
|
||||
if (it != symbols().cbegin()) {
|
||||
kernelNames_.append(1, ';');
|
||||
}
|
||||
kernelNames_.append(it->first.c_str());
|
||||
@@ -555,12 +545,10 @@ cl_int Program::build(const std::vector<Device*>& devices, const char* options,
|
||||
}
|
||||
|
||||
void Program::clear() {
|
||||
deviceprograms_t::iterator sit;
|
||||
|
||||
// Destroy old programs if we have any
|
||||
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
|
||||
for (const auto& it : devicePrograms_) {
|
||||
// Destroy device program
|
||||
delete sit->second;
|
||||
delete it.second;
|
||||
}
|
||||
|
||||
devicePrograms_.clear();
|
||||
@@ -631,13 +619,13 @@ bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func, b
|
||||
|
||||
const device::Kernel* Symbol::getDeviceKernel(const Device& device, bool noAlias) const {
|
||||
const devicekernels_t* devKernels = (noAlias) ? &deviceKernels_ : &devKernelsNoOpt_;
|
||||
devicekernels_t::const_iterator itEnd = devKernels->end();
|
||||
devicekernels_t::const_iterator it = devKernels->find(&device);
|
||||
const auto itEnd = devKernels->cend();
|
||||
auto it = devKernels->find(&device);
|
||||
if (it != itEnd) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
for (it = devKernels->begin(); it != itEnd; ++it) {
|
||||
for (it = devKernels->cbegin(); it != itEnd; ++it) {
|
||||
if (it->first->isAncestor(&device)) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ namespace amd {
|
||||
//! A kernel function symbol
|
||||
class Symbol : public HeapObject {
|
||||
public:
|
||||
typedef std::map<const Device*, const device::Kernel*> devicekernels_t;
|
||||
typedef std::unordered_map<const Device*, const device::Kernel*> devicekernels_t;
|
||||
|
||||
private:
|
||||
devicekernels_t deviceKernels_; //! All device kernels objects.
|
||||
@@ -68,9 +68,9 @@ class Program : public RuntimeObject {
|
||||
public:
|
||||
typedef std::pair<uint8_t*, size_t> binary_t;
|
||||
typedef std::set<Device const*> devicelist_t;
|
||||
typedef std::map<Device const*, binary_t> devicebinary_t;
|
||||
typedef std::map<Device const*, device::Program*> deviceprograms_t;
|
||||
typedef std::map<std::string, Symbol> symbols_t;
|
||||
typedef std::unordered_map<Device const*, binary_t> devicebinary_t;
|
||||
typedef std::unordered_map<Device const*, device::Program*> deviceprograms_t;
|
||||
typedef std::unordered_map<std::string, Symbol> symbols_t;
|
||||
|
||||
enum Language {
|
||||
Binary = 0,
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace amd {
|
||||
//! Abstraction layer sampler class
|
||||
class Sampler : public RuntimeObject {
|
||||
public:
|
||||
typedef std::map<Device const*, device::Sampler*> DeviceSamplers;
|
||||
typedef std::unordered_map<Device const*, device::Sampler*> DeviceSamplers;
|
||||
|
||||
//! \note the sampler states must match the compiler's defines.
|
||||
//! See amd_ocl_sys_predef.c
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include "top.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
@@ -75,7 +75,7 @@ void Flag::tearDown() {
|
||||
}
|
||||
|
||||
bool Flag::init() {
|
||||
typedef std::map<std::string, const char*> vars_type;
|
||||
typedef std::unordered_map<std::string, const char*> vars_type;
|
||||
vars_type vars;
|
||||
|
||||
#ifdef _WIN32
|
||||
@@ -116,8 +116,8 @@ bool Flag::init() {
|
||||
for (size_t i = 0; i < numFlags_; ++i) {
|
||||
Flag& flag = flags_[i];
|
||||
|
||||
vars_type::iterator it = vars.find(flag.name_);
|
||||
if (it != vars.end()) {
|
||||
const auto it = vars.find(flag.name_);
|
||||
if (it != vars.cend()) {
|
||||
flag.setValue(it->second);
|
||||
}
|
||||
}
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする