SWDEV-306939 - Fix vdi errors/warnings by CppCheck
Change-Id: I56d910f8363787f1050d5d7e8064ed553c5827fd
Этот коммит содержится в:
@@ -1093,7 +1093,7 @@ parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isL
|
||||
bool isPrefix_mno = false;
|
||||
bool isPrefix_option = false;
|
||||
|
||||
std::string name, value;
|
||||
std::string value;
|
||||
size_t sPos = pos;
|
||||
int option_ndx
|
||||
= getOptionDesc(options, sPos, isShortName, OFA_NORMAL, pos, value);
|
||||
|
||||
@@ -258,7 +258,6 @@ public:
|
||||
int r = option_ndx/32;
|
||||
int c = option_ndx%32;
|
||||
const uint32_t *p = &flags[r];
|
||||
uint32_t b = (1 << c);
|
||||
return 1 & ((*p) >> c);
|
||||
}
|
||||
|
||||
@@ -266,7 +265,7 @@ public:
|
||||
return (getFlag(option_ndx) == FLAG_SEEN);
|
||||
}
|
||||
|
||||
int getLLVMArgc() { return llvmargc; }
|
||||
int getLLVMArgc() const { return llvmargc; }
|
||||
char** getLLVMArgv() { return llvmargv; }
|
||||
void setLLVMArgs (int argc, char** argv) {
|
||||
llvmargc = argc;
|
||||
@@ -290,7 +289,7 @@ public:
|
||||
bool isCStrOptionsEqual(const char *cs1, const char* cs2) const;
|
||||
|
||||
|
||||
bool useDefaultWGS() { return UseDefaultWGS; }
|
||||
bool useDefaultWGS() const { return UseDefaultWGS; }
|
||||
void setDefaultWGS(bool V) { UseDefaultWGS = V; }
|
||||
|
||||
std::string& optionsLog() { return OptionsLog; }
|
||||
@@ -328,7 +327,7 @@ private:
|
||||
|
||||
bool UseDefaultWGS;
|
||||
|
||||
bool dumpEncrypt(DumpFlags f) {
|
||||
bool dumpEncrypt(DumpFlags f) const {
|
||||
return ((encryptCode == 0) || // return true if not encrypted
|
||||
(f & DUMP_ENCRYPT));
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ class MessageHandler {
|
||||
std::vector<Message*> messageSlots_;
|
||||
|
||||
Message* newMessage();
|
||||
Message* getMessage(uint64_t desc);
|
||||
Message* getMessage(uint64_t messageId);
|
||||
void discardMessage(Message* message);
|
||||
|
||||
public:
|
||||
|
||||
@@ -32,6 +32,7 @@ static void checkPrintf(FILE* stream, int* outCount, const char* fmt, ...) {
|
||||
va_start(args, fmt);
|
||||
int retval = vfprintf(stream, fmt, args);
|
||||
*outCount = retval < 0 ? retval : *outCount + retval;
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static int countStars(const std::string& spec) {
|
||||
|
||||
@@ -539,13 +539,12 @@ bool Device::create(const Isa &isa) {
|
||||
void Device::registerDevice() {
|
||||
assert(Runtime::singleThreaded() && "this is not thread-safe");
|
||||
|
||||
static bool defaultIsAssigned = false;
|
||||
|
||||
if (devices_ == nullptr) {
|
||||
devices_ = new std::vector<Device*>;
|
||||
}
|
||||
|
||||
if (info_.available_) {
|
||||
static bool defaultIsAssigned = false;
|
||||
if (!defaultIsAssigned && online_) {
|
||||
defaultIsAssigned = true;
|
||||
info_.type_ |= CL_DEVICE_TYPE_DEFAULT;
|
||||
@@ -796,7 +795,6 @@ bool ClBinary::setElfTarget() {
|
||||
assert(((0xFFFF8000 & Target) == 0) && "ASIC target ID >= 2^15");
|
||||
uint16_t elf_target = static_cast<uint16_t>(0x7FFF & Target);
|
||||
return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM);
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
|
||||
@@ -185,8 +185,8 @@ class Program : public amd::HeapObject {
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Link the device program.
|
||||
int32_t link(const std::vector<Program*>& inputPrograms, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
int32_t link(const std::vector<Program*>& inputPrograms, const char* origLinkOptions,
|
||||
amd::option::Options* linkOptions);
|
||||
|
||||
//! Build the device program.
|
||||
int32_t build(const std::string& sourceCode, const char* origOptions,
|
||||
@@ -436,7 +436,7 @@ class Program : public amd::HeapObject {
|
||||
char* outBinary[] = nullptr, size_t* outSize = nullptr);
|
||||
|
||||
//! Set the OCL language
|
||||
void setLanguage(const char* clStd, amd_comgr_language_t* oclver);
|
||||
void setLanguage(const char* clStd, amd_comgr_language_t* langver);
|
||||
|
||||
//! Create code object and add it into the data set
|
||||
amd_comgr_status_t addCodeObjData(const char *source,
|
||||
@@ -448,7 +448,7 @@ class Program : public amd::HeapObject {
|
||||
const std::vector<std::string>& preCompiledHeaders);
|
||||
|
||||
//! Create action for the specified language, target and options
|
||||
amd_comgr_status_t createAction(const amd_comgr_language_t oclvar,
|
||||
amd_comgr_status_t createAction(const amd_comgr_language_t oclver,
|
||||
const std::vector<std::string>& options, amd_comgr_action_info_t* action,
|
||||
bool* hasAction);
|
||||
|
||||
@@ -456,12 +456,12 @@ class Program : public amd::HeapObject {
|
||||
bool linkLLVMBitcode(const amd_comgr_data_set_t inputs,
|
||||
const std::vector<std::string>& options, const bool requiredDump,
|
||||
amd::option::Options* amdOptions, amd_comgr_data_set_t* output,
|
||||
char* binary[] = nullptr, size_t* binarySize = nullptr);
|
||||
char* binaryData[] = nullptr, size_t* binarySize = nullptr);
|
||||
|
||||
//! Create the bitcode of the compiled input dataset
|
||||
bool compileToLLVMBitcode(const amd_comgr_data_set_t inputs,
|
||||
bool compileToLLVMBitcode(const amd_comgr_data_set_t compileInputs,
|
||||
const std::vector<std::string>& options, amd::option::Options* amdOptions,
|
||||
char* binary[], size_t* binarySize);
|
||||
char* binaryData[], size_t* binarySize);
|
||||
|
||||
//! Compile and create the excutable of the input dataset
|
||||
bool compileAndLinkExecutable(const amd_comgr_data_set_t inputs,
|
||||
|
||||
@@ -249,7 +249,7 @@ void GpuDebugManager::wavefrontControl(uint32_t waveAction, uint32_t waveMode, u
|
||||
}
|
||||
|
||||
void GpuDebugManager::setAddressWatch(uint32_t numWatchPoints, void** watchAddress,
|
||||
uint64_t* watchMask, uint64_t* watchMode, DebugEvent* event) {
|
||||
uint64_t* watchMask, uint64_t* watchMode, DebugEvent* pEvent) {
|
||||
size_t requiredSize = numWatchPoints * sizeof(HwDbgAddressWatch);
|
||||
|
||||
// previously allocated size is not big enough, allocate new memory
|
||||
|
||||
@@ -709,10 +709,9 @@ Device::XferBuffers::~XferBuffers() {
|
||||
}
|
||||
|
||||
bool Device::XferBuffers::create() {
|
||||
Memory* xferBuf = NULL;
|
||||
bool result = false;
|
||||
// Create a buffer object
|
||||
xferBuf = new Memory(dev(), bufSize_);
|
||||
Memory* xferBuf = new Memory(dev(), bufSize_);
|
||||
|
||||
// Try to allocate memory for the transfer buffer
|
||||
if ((NULL == xferBuf) || !xferBuf->create(type_)) {
|
||||
@@ -1150,11 +1149,11 @@ bool Device::initializeHeapResources() {
|
||||
|
||||
device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) {
|
||||
bool profiling = false;
|
||||
bool interopQueue = false;
|
||||
uint rtCUs = amd::CommandQueue::RealTimeDisabled;
|
||||
uint deviceQueueSize = 0;
|
||||
|
||||
if (queue != NULL) {
|
||||
bool interopQueue = false;
|
||||
profiling = queue->properties().test(CL_QUEUE_PROFILING_ENABLE);
|
||||
if (queue->asHostQueue() != NULL) {
|
||||
interopQueue = (0 != (queue->context().info().flags_ &
|
||||
@@ -1196,11 +1195,10 @@ typedef std::unordered_map<int, bool> requestedDevices_t;
|
||||
|
||||
//! Parses the requested list of devices to be exposed to the user.
|
||||
static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) {
|
||||
char* pch = NULL;
|
||||
int requestedDeviceCount = 0;
|
||||
const char* requestedDeviceList = GPU_DEVICE_ORDINAL;
|
||||
|
||||
pch = strtok(const_cast<char*>(requestedDeviceList), ",");
|
||||
char* pch = strtok(const_cast<char*>(requestedDeviceList), ",");
|
||||
while (pch != NULL) {
|
||||
bool deviceIdValid = true;
|
||||
int currentDeviceIndex = atoi(pch);
|
||||
@@ -1318,10 +1316,9 @@ amd::Image::Format Device::getOclFormat(const CalFormat& format) const {
|
||||
|
||||
// Create buffer without an owner (merge common code with createBuffer() ?)
|
||||
gpu::Memory* Device::createScratchBuffer(size_t size) const {
|
||||
Memory* gpuMemory = NULL;
|
||||
|
||||
// Create a memory object
|
||||
gpuMemory = new gpu::Memory(*this, size);
|
||||
Memory* gpuMemory = new gpu::Memory(*this, size);
|
||||
if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
|
||||
delete gpuMemory;
|
||||
gpuMemory = NULL;
|
||||
@@ -1501,7 +1498,6 @@ gpu::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const {
|
||||
}
|
||||
|
||||
gpu::Memory* Device::createImage(amd::Memory& owner, bool directAccess) const {
|
||||
size_t size = owner.getSize();
|
||||
amd::Image& image = *owner.asImage();
|
||||
gpu::Memory* gpuImage = NULL;
|
||||
CalFormat format = getCalFormat(image.getImageFormat());
|
||||
@@ -1645,19 +1641,16 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
|
||||
}
|
||||
|
||||
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
|
||||
size_t size = owner.getSize();
|
||||
assert((owner.asImage() != NULL) && "View supports images only");
|
||||
const amd::Image& image = *owner.asImage();
|
||||
gpu::Memory* gpuImage = NULL;
|
||||
CalFormat format = getCalFormat(image.getImageFormat());
|
||||
|
||||
gpuImage =
|
||||
gpu::Memory* gpuImage =
|
||||
new gpu::Image(*this, owner, image.getWidth(), image.getHeight(), image.getDepth(),
|
||||
format.type_, format.channelOrder_, image.getType(), image.getMipLevels());
|
||||
|
||||
// Create resource
|
||||
if (NULL != gpuImage) {
|
||||
bool result = false;
|
||||
Resource::ImageViewParams params;
|
||||
const gpu::Memory& gpuMem = static_cast<const gpu::Memory&>(parent);
|
||||
|
||||
@@ -1669,7 +1662,7 @@ device::Memory* Device::createView(amd::Memory& owner, const device::Memory& par
|
||||
params.memory_ = &gpuMem;
|
||||
|
||||
// Create memory object
|
||||
result = gpuImage->create(Resource::ImageView, ¶ms);
|
||||
bool result = gpuImage->create(Resource::ImageView, ¶ms);
|
||||
if (!result) {
|
||||
delete gpuImage;
|
||||
return NULL;
|
||||
@@ -2136,8 +2129,7 @@ void Device::svmFree(void* ptr) const {
|
||||
if (isFineGrainedSystem()) {
|
||||
amd::Os::alignedFree(ptr);
|
||||
} else {
|
||||
amd::Memory* svmMem = NULL;
|
||||
svmMem = amd::MemObjMap::FindMemObj(ptr);
|
||||
amd::Memory* svmMem = amd::MemObjMap::FindMemObj(ptr);
|
||||
if (NULL != svmMem) {
|
||||
svmMem->release();
|
||||
amd::MemObjMap::RemoveMemObj(ptr);
|
||||
@@ -2258,12 +2250,11 @@ int32_t Device::hwDebugManagerInit(amd::Context* context, uintptr_t messageStora
|
||||
}
|
||||
|
||||
bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
|
||||
bool result = true;
|
||||
static const bool bValidate = true;
|
||||
PerformAdapterInitialization(bValidate);
|
||||
GSLClockModeInfo clockModeInfo = {};
|
||||
clockModeInfo.clockmode = static_cast<GSLClockMode>(setClockModeInput.clock_mode);
|
||||
result = gslSetClockMode(&clockModeInfo);
|
||||
bool result = gslSetClockMode(&clockModeInfo);
|
||||
CloseInitializedAdapter(bValidate);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -819,7 +819,6 @@ void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& reg
|
||||
if (memory == NULL) {
|
||||
// for map target of svm buffer , we need use svm host ptr
|
||||
memory = new (dev().context()) amd::Buffer(dev().context(), flag, owner()->getSize());
|
||||
Memory* gpuMemory;
|
||||
|
||||
do {
|
||||
if ((memory == NULL) || !memory->create(initHostPtr, SysMem)) {
|
||||
@@ -828,7 +827,7 @@ void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& reg
|
||||
}
|
||||
memory->setCacheStatus(canBeCached);
|
||||
|
||||
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
||||
Memory* gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
||||
|
||||
// Create, Map and get the base pointer for the resource
|
||||
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
||||
@@ -1082,14 +1081,13 @@ void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& regi
|
||||
amd::Buffer(dev().context(), 0, cal()->width_ * height * depth * elementSize());
|
||||
memory->setVirtualDevice(owner()->getVirtualDevice());
|
||||
|
||||
Memory* gpuMemory;
|
||||
do {
|
||||
if ((memory == NULL) || !memory->create(NULL, SysMem)) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
||||
Memory* gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
||||
|
||||
// Create, Map and get the base pointer for the resource
|
||||
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
||||
|
||||
@@ -405,7 +405,6 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
|
||||
if (posStart != std::string::npos) {
|
||||
bool printFloat = false;
|
||||
int vectorSize = 0;
|
||||
size_t length;
|
||||
size_t idPos = 0;
|
||||
|
||||
// Search for PrintfDbg specifier in the format string.
|
||||
@@ -442,7 +441,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
|
||||
|
||||
// Is it a scalar value?
|
||||
if (vectorSize == 0) {
|
||||
length = outputArgument(fmt, printFloat, info.arguments_[j], &s[i]);
|
||||
size_t length = outputArgument(fmt, printFloat, info.arguments_[j], &s[i]);
|
||||
if (0 == length) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -241,12 +241,11 @@ bool NullProgram::linkImpl(amd::option::Options* options) {
|
||||
if ((dbgSize > 0) && options->isDumpFlagSet(amd::option::DUMP_DEBUGIL)) {
|
||||
std::string debugilWithLine;
|
||||
size_t b = 1;
|
||||
size_t e;
|
||||
int linenum = 0;
|
||||
char cstr[9];
|
||||
cstr[8] = 0;
|
||||
while (b != std::string::npos) {
|
||||
e = debugILStr.find_first_of("\n", b);
|
||||
size_t e = debugILStr.find_first_of("\n", b);
|
||||
if (e != std::string::npos) {
|
||||
++e;
|
||||
}
|
||||
@@ -582,12 +581,11 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
if ((dbgSize > 0) && options->isDumpFlagSet(amd::option::DUMP_DEBUGIL)) {
|
||||
std::string debugilWithLine;
|
||||
size_t b = 1;
|
||||
size_t e;
|
||||
int linenum = 0;
|
||||
char cstr[9];
|
||||
cstr[8] = 0;
|
||||
while (b != std::string::npos) {
|
||||
e = debugILStr.find_first_of("\n", b);
|
||||
size_t e = debugILStr.find_first_of("\n", b);
|
||||
if (e != std::string::npos) {
|
||||
++e;
|
||||
}
|
||||
@@ -1978,7 +1976,6 @@ bool ORCAHSALoaderContext::GpuMemCopy(void* dst, size_t offset, const void* src,
|
||||
gpu::Memory* mem = reinterpret_cast<gpu::Memory*>(dst);
|
||||
return program_->gpuDevice().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
|
||||
true);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ORCAHSALoaderContext::GpuMemFree(void* ptr, size_t size) {
|
||||
|
||||
@@ -1149,7 +1149,7 @@ void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
|
||||
// Check if we have to wait unconditionally
|
||||
if (!waitOnBusyEngine ||
|
||||
// or we have to wait only if another engine was used on this resource
|
||||
(waitOnBusyEngine && (gpuEvent->engineId_ != gpu.engineID_))) {
|
||||
(gpuEvent->engineId_ != gpu.engineID_)) {
|
||||
gpu.waitForEvent(gpuEvent);
|
||||
}
|
||||
|
||||
|
||||
@@ -120,13 +120,13 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
|
||||
void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
if (numMemObjectsInQueue_ > 0) {
|
||||
size_t i, j;
|
||||
if (all) {
|
||||
endMemObjectsInQueue_ = numMemObjectsInQueue_;
|
||||
}
|
||||
|
||||
// If the current launch didn't start from the beginning, then move the data
|
||||
if (0 != endMemObjectsInQueue_) {
|
||||
size_t i, j;
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
@@ -1096,7 +1096,6 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) {
|
||||
vcmd.setStatus(CL_MAP_FAILURE);
|
||||
}
|
||||
} else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
|
||||
amd::Memory* bufferFromImage = NULL;
|
||||
Memory* memoryBuf = memory;
|
||||
amd::Coord3D origin(vcmd.origin()[0]);
|
||||
amd::Coord3D size(vcmd.size()[0]);
|
||||
@@ -1104,7 +1103,7 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) {
|
||||
origin.c[0] *= elemSize;
|
||||
size.c[0] *= elemSize;
|
||||
|
||||
bufferFromImage = createBufferFromImage(vcmd.memory());
|
||||
amd::Memory* bufferFromImage = createBufferFromImage(vcmd.memory());
|
||||
if (NULL == bufferFromImage) {
|
||||
LogError("We should not fail buffer creation from image_buffer!");
|
||||
} else {
|
||||
@@ -1195,7 +1194,6 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
|
||||
vcmd.setStatus(CL_OUT_OF_RESOURCES);
|
||||
}
|
||||
} else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
|
||||
amd::Memory* bufferFromImage = NULL;
|
||||
Memory* memoryBuf = memory;
|
||||
amd::Coord3D origin(writeMapInfo->origin_[0]);
|
||||
amd::Coord3D size(writeMapInfo->region_[0]);
|
||||
@@ -1203,7 +1201,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
|
||||
origin.c[0] *= elemSize;
|
||||
size.c[0] *= elemSize;
|
||||
|
||||
bufferFromImage = createBufferFromImage(vcmd.memory());
|
||||
amd::Memory* bufferFromImage = createBufferFromImage(vcmd.memory());
|
||||
if (NULL == bufferFromImage) {
|
||||
LogError("We should not fail buffer creation from image_buffer!");
|
||||
} else {
|
||||
@@ -3071,7 +3069,6 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
const amd::KernelParameterDescriptor& desc = signature.at(i);
|
||||
const HSAILKernel::Argument* arg = hsaKernel.argument(i);
|
||||
Memory* gpuMem = nullptr;
|
||||
bool readOnly = false;
|
||||
amd::Memory* mem = nullptr;
|
||||
|
||||
// Find if current argument is a buffer
|
||||
@@ -3098,6 +3095,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
memoryDependency().clear(!All);
|
||||
continue;
|
||||
} else if (gpuMem != nullptr) {
|
||||
bool readOnly = false;
|
||||
// Check image
|
||||
readOnly = (desc.accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
// Check buffer
|
||||
@@ -3156,8 +3154,7 @@ void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispat
|
||||
kernelInfo.scratchBufferSizeInBytes = scratchBuf->size();
|
||||
|
||||
// Get the address of the scratch buffer and its size for CPU access
|
||||
address scratchRingAddr = NULL;
|
||||
scratchRingAddr = static_cast<address>(scratchBuf->map(NULL, 0));
|
||||
address scratchRingAddr = static_cast<address>(scratchBuf->map(NULL, 0));
|
||||
dbgManager->setScratchRing(scratchRingAddr, scratchBuf->size());
|
||||
scratchBuf->unmap(NULL);
|
||||
} else {
|
||||
|
||||
@@ -363,11 +363,11 @@ CALGSLContext::copyPartial(GpuEvent& event,
|
||||
uint32 mode = GSL_SYNCUPLOAD_IGNORE_ELEMENTSIZE;
|
||||
EngineType engineId = MainEngine;
|
||||
assert(m_cs != 0);
|
||||
CopyType type = USE_NONE;
|
||||
uint64 linearBytePitch = 0;
|
||||
intp bpp = 0;
|
||||
|
||||
type = dev()->GetCopyType(srcMem, destMem, srcOffset, destOffset, m_allowDMA, flags, size[0], enableRectCopy);
|
||||
CopyType type = dev()->GetCopyType(srcMem, destMem, srcOffset, destOffset, m_allowDMA,
|
||||
flags, size[0], enableRectCopy);
|
||||
|
||||
if(type == USE_NONE)
|
||||
{
|
||||
|
||||
@@ -75,7 +75,7 @@ public:
|
||||
}
|
||||
|
||||
bool copyPartial(GpuEvent& event, gslMemObject srcMem, size_t* srcOffset,
|
||||
gslMemObject destMem, size_t* destOffset, size_t* size, CALmemcopyflags flags, bool enableCopyRect, uint32 bytesPerElement);
|
||||
gslMemObject destMem, size_t* destOffset, size_t* size, CALmemcopyflags flags, bool enableRectCopy, uint32 bytesPerElement);
|
||||
|
||||
void setSamplerParameter(uint32 sampler, gslTexParameterPname param, CALvoid* vals);
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ static uint32 parse4TupleValues(const char* element, uint32*& values)
|
||||
return numTuples;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
CALGSLDevice::parsePowerParam(const char* element, gslRuntimeConfigUint32Value& pwrCount, gslRuntimeConfigUint32pValue& pwrPointer)
|
||||
{
|
||||
uint32 count = 0;
|
||||
@@ -843,7 +843,7 @@ Wait(gsl::gsCtx* cs, gslQueryTarget target, gslQueryObject object)
|
||||
assert(param == 1);
|
||||
}
|
||||
|
||||
bool
|
||||
static bool
|
||||
CALGSLDevice::ResolveAperture(const gslMemObjectAttribTiling tiling) const
|
||||
{
|
||||
// Don't ask for aperture if the tiling is linear.
|
||||
@@ -1163,7 +1163,7 @@ CALGSLDevice::resMapLocal(size_t& pitch,
|
||||
else
|
||||
{
|
||||
// Allocate map structure for the unmap call
|
||||
GSLDeviceMemMap* memMap = (GSLDeviceMemMap*)malloc(sizeof(GSLDeviceMemMap));
|
||||
GSLDeviceMemMap* memMap = static_cast<GSLDeviceMemMap*>(malloc(sizeof(GSLDeviceMemMap)));
|
||||
|
||||
if (memMap == NULL)
|
||||
{
|
||||
@@ -1270,7 +1270,7 @@ CALGSLDevice::resUnmapLocal(gslMemObject mem)
|
||||
return;
|
||||
}
|
||||
|
||||
GSLDeviceMemMap* memMap = (GSLDeviceMemMap*)iter->second;
|
||||
GSLDeviceMemMap* memMap = static_cast<GSLDeviceMemMap*>(iter->second);
|
||||
m_hack.erase(iter);
|
||||
|
||||
memMap->mem->unmap(m_cs);
|
||||
@@ -1500,7 +1500,7 @@ CALGSLDevice::calcScratchBufferSize(uint32 regNum) const
|
||||
return scratchBufferSizes[target];
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
CALGSLDevice::convertInputChannelOrder(intp*channelOrder) const
|
||||
{
|
||||
// set default to indicate that we don't want to override the channel order.
|
||||
@@ -1659,4 +1659,4 @@ CALGSLDevice::gslSetClockMode(GSLClockModeInfo * clockModeInfo)
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,12 +88,12 @@ public:
|
||||
void close();
|
||||
|
||||
gslMemObject resAlloc(const CALresourceDesc* desc) const;
|
||||
void* resMapLocal(size_t& pitch, gslMemObject res, gslMapAccessType flags);
|
||||
void resUnmapLocal(gslMemObject res);
|
||||
void* resMapLocal(size_t& pitch, gslMemObject mem, gslMapAccessType flags);
|
||||
void resUnmapLocal(gslMemObject mem);
|
||||
|
||||
void resFree(gslMemObject mem) const;
|
||||
void* resMapRemote(size_t& pitch, gslMemObject res, gslMapAccessType flags) const;
|
||||
void resUnmapRemote(gslMemObject res) const;
|
||||
void* resMapRemote(size_t& pitch, gslMemObject mem, gslMapAccessType flags) const;
|
||||
void resUnmapRemote(gslMemObject mem) const;
|
||||
|
||||
gslMemObject resGetHeap(size_t size) const;
|
||||
gslMemObject resAllocView(gslMemObject res, gslResource3D size,
|
||||
|
||||
@@ -69,7 +69,7 @@ CALGSLDevice::associateD3D9Device(void* d3d9Device)
|
||||
#else // !ATI_OS_WIN
|
||||
|
||||
bool
|
||||
CALGSLDevice::associateD3D9Device(void* d3dDevice)
|
||||
CALGSLDevice::associateD3D9Device(void* d3d9Device)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ IniSection::IniSection()
|
||||
IniSection::IniSection(const IniSection& s)
|
||||
{
|
||||
name = s.name;
|
||||
for(EntryDBIterator iter = s.entryDB.begin() ; iter != s.entryDB.end(); iter++)
|
||||
for(EntryDBIterator iter = s.entryDB.begin() ; iter != s.entryDB.end(); ++iter)
|
||||
{
|
||||
entryDB[iter->first] = iter->second;
|
||||
}
|
||||
@@ -196,7 +196,7 @@ IniSection::IniSection(cmString n)
|
||||
|
||||
IniSection::~IniSection()
|
||||
{
|
||||
for(EntryDBIterator iter = entryDB.begin() ; iter != entryDB.end(); iter++)
|
||||
for(EntryDBIterator iter = entryDB.begin() ; iter != entryDB.end(); ++iter)
|
||||
{
|
||||
delete iter->second;
|
||||
}
|
||||
@@ -267,7 +267,7 @@ IniFile::~IniFile()
|
||||
sectionDB.clear();
|
||||
}
|
||||
|
||||
const cmString IniSection::getName()
|
||||
const cmString IniSection::getName () const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
@@ -376,7 +376,7 @@ void IniValueFloat::printAST()
|
||||
|
||||
void IniSection::printAST()
|
||||
{
|
||||
for(EntryDBIterator iter = entryDB.begin() ; iter != entryDB.end(); iter++)
|
||||
for(EntryDBIterator iter = entryDB.begin() ; iter != entryDB.end(); ++iter)
|
||||
{
|
||||
cmString name = iter->first;
|
||||
IniValue *v = iter->second;
|
||||
@@ -389,7 +389,7 @@ void IniSection::printAST()
|
||||
|
||||
void IniFile::printAST()
|
||||
{
|
||||
for(SectionDBIterator iter = sectionDB.begin() ; iter != sectionDB.end(); iter++)
|
||||
for(SectionDBIterator iter = sectionDB.begin() ; iter != sectionDB.end(); ++iter)
|
||||
{
|
||||
IniSection* s = iter->second;
|
||||
std::cerr << "[" << s->getName().c_str() << "]\n";
|
||||
|
||||
@@ -158,7 +158,7 @@ public:
|
||||
bool is_float;
|
||||
};
|
||||
|
||||
int cmp_nocase(const std::string s1, const std::string s2)
|
||||
int cmp_nocase(const std::string& s1, const std::string& s2)
|
||||
{
|
||||
std::string::const_iterator p1 = s1.begin();
|
||||
std::string::const_iterator p2 = s2.begin();
|
||||
@@ -175,7 +175,7 @@ int cmp_nocase(const std::string s1, const std::string s2)
|
||||
return static_cast<int>(s2.size()-s1.size());
|
||||
}
|
||||
|
||||
IniValue* IniFileParser::parseValue(std::string value ) {
|
||||
IniValue* IniFileParser::parseValue(std::string& value ) {
|
||||
std::string trimmed = trim(value);
|
||||
|
||||
std::stringstream ss(trimmed);
|
||||
|
||||
@@ -144,10 +144,10 @@ void HwDebugManager::assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem) {
|
||||
paramMemory_[paramIdx] = mem;
|
||||
}
|
||||
|
||||
void HwDebugManager::installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* trapHandler,
|
||||
amd::Memory* trapBuffer) {
|
||||
rtTrapInfo_[trapType << 2] = trapHandler;
|
||||
rtTrapInfo_[(trapType << 2) + 1] = trapBuffer;
|
||||
void HwDebugManager::installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler,
|
||||
amd::Memory* pTrapBuffer) {
|
||||
rtTrapInfo_[trapType << 2] = pTrapHandler;
|
||||
rtTrapInfo_[(trapType << 2) + 1] = pTrapBuffer;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -105,8 +105,8 @@ class HwDebugManager {
|
||||
virtual ~HwDebugManager();
|
||||
|
||||
//! Setup the call back function pointer
|
||||
void setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFn,
|
||||
cl_PostDispatchCallBackFunctionAMD postDispatchFn);
|
||||
void setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFuncion,
|
||||
cl_PostDispatchCallBackFunctionAMD postDispatchFunction);
|
||||
|
||||
//! Setup the call back argument pointers
|
||||
void setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs);
|
||||
@@ -146,10 +146,10 @@ class HwDebugManager {
|
||||
device::Memory* runtimeTMA() const { return runtimeTMA_; }
|
||||
|
||||
//! Set exception policy
|
||||
void setExceptionPolicy(void* policy);
|
||||
void setExceptionPolicy(void* exceptionPolicy);
|
||||
|
||||
//! Get exception policy
|
||||
void getExceptionPolicy(void* policy) const;
|
||||
void getExceptionPolicy(void* exceptionPolicy) const;
|
||||
|
||||
//! Set the kernel execution mode
|
||||
void setKernelExecutionMode(void* mode);
|
||||
|
||||
@@ -293,7 +293,7 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
|
||||
} else {
|
||||
chunkSize = std::min(amd::alignUp(xferSize / 4, 256), gpu().xferWrite().MaxSize());
|
||||
chunkSize = std::max(chunkSize, 64 * Ki);
|
||||
bool flushDMA = true;
|
||||
flushDMA = true;
|
||||
}
|
||||
size_t srcOffset = 0;
|
||||
uint32_t flags = Resource::NoWait;
|
||||
|
||||
@@ -2402,12 +2402,11 @@ int32_t Device::hwDebugManagerInit(amd::Context* context, uintptr_t messageStora
|
||||
|
||||
bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
|
||||
bool result = false;
|
||||
Pal::SetClockModeInput setClockMode = {};
|
||||
Pal::DeviceClockMode palClockMode =
|
||||
static_cast<Pal::DeviceClockMode>(setClockModeInput.clock_mode);
|
||||
setClockMode.clockMode = palClockMode;
|
||||
result = (Pal::Result::Success ==
|
||||
bool result = (Pal::Result::Success ==
|
||||
(iDev()->SetClockMode(setClockMode,
|
||||
reinterpret_cast<Pal::SetClockModeOutput*>(pSetClockModeOutput))))
|
||||
? true
|
||||
|
||||
@@ -41,7 +41,6 @@ bool Device::associateD3D10Device(void* d3d10Device) { return false; }
|
||||
namespace pal {
|
||||
|
||||
static bool queryD3D10DeviceGPUMask(ID3D10Device* pd3d10Device, UINT* pd3d10DeviceGPUMask) {
|
||||
HMODULE hDLL = nullptr;
|
||||
IAmdDxExt* pExt = nullptr;
|
||||
IAmdDxExtCLInterop* pCLExt = nullptr;
|
||||
PFNAmdDxExtCreate AmdDxExtCreate;
|
||||
@@ -54,7 +53,7 @@ static bool queryD3D10DeviceGPUMask(ID3D10Device* pd3d10Device, UINT* pd3d10Devi
|
||||
static constexpr CHAR dxxModuleName[13] = "atidxx32.dll";
|
||||
#endif
|
||||
|
||||
hDLL = GetModuleHandle(dxxModuleName);
|
||||
HMODULE hDLL = GetModuleHandle(dxxModuleName);
|
||||
|
||||
if (hDLL == nullptr) {
|
||||
hr = E_FAIL;
|
||||
|
||||
@@ -41,7 +41,6 @@ bool Device::associateD3D11Device(void* d3d11Device) { return false; }
|
||||
namespace pal {
|
||||
|
||||
static bool queryD3D11DeviceGPUMask(ID3D11Device* pd3d11Device, UINT* pd3d11DeviceGPUMask) {
|
||||
HMODULE hDLL = nullptr;
|
||||
IAmdDxExt* pExt = nullptr;
|
||||
IAmdDxExtCLInterop* pCLExt = nullptr;
|
||||
PFNAmdDxExtCreate11 AmdDxExtCreate11;
|
||||
@@ -54,7 +53,7 @@ static bool queryD3D11DeviceGPUMask(ID3D11Device* pd3d11Device, UINT* pd3d11Devi
|
||||
static constexpr CHAR dxxModuleName[13] = "atidxx32.dll";
|
||||
#endif
|
||||
|
||||
hDLL = GetModuleHandle(dxxModuleName);
|
||||
HMODULE hDLL = GetModuleHandle(dxxModuleName);
|
||||
|
||||
if (hDLL == nullptr) {
|
||||
hr = E_FAIL;
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
#if defined(ATI_OS_LINUX)
|
||||
namespace pal {
|
||||
bool Device::associateD3D9Device(void* d3dDevice) { return false; }
|
||||
bool Device::associateD3D9Device(void* d3d9Device) { return false; }
|
||||
} // namespace pal
|
||||
#else // !ATI_OS_LINUX
|
||||
|
||||
|
||||
@@ -131,12 +131,11 @@ bool HSAILKernel::init() {
|
||||
workgroupGroupSegmentByteSize_ = workGroupInfo_.usedLDSSize_;
|
||||
kernargSegmentByteSize_ = akc->kernarg_segment_byte_size;
|
||||
|
||||
acl_error error = ACL_SUCCESS;
|
||||
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfArgList);
|
||||
acl_error error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(),
|
||||
RT_ARGUMENT_ARRAY, openClKernelName.c_str(),
|
||||
nullptr, &sizeOfArgList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -50,4 +50,4 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
@@ -624,13 +624,13 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
|
||||
void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
if (numMemObjectsInQueue_ > 0) {
|
||||
size_t i, j;
|
||||
if (all) {
|
||||
endMemObjectsInQueue_ = numMemObjectsInQueue_;
|
||||
}
|
||||
|
||||
// If the current launch didn't start from the beginning, then move the data
|
||||
if (0 != endMemObjectsInQueue_) {
|
||||
size_t i, j;
|
||||
// Preserve all objects from the current kernel
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
@@ -2553,8 +2553,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
Pal::DispatchAqlParams dispatchParam = {};
|
||||
dispatchParam.pAqlPacket = aqlPkt;
|
||||
if (hsaKernel.workGroupInfo()->scratchRegs_ > 0) {
|
||||
const Device::ScratchBuffer* scratch = nullptr;
|
||||
scratch = dev().scratch(hwRing());
|
||||
const Device::ScratchBuffer* scratch = dev().scratch(hwRing());
|
||||
dispatchParam.scratchAddr = scratch->memObj_->vmAddress();
|
||||
dispatchParam.scratchSize = scratch->size_;
|
||||
dispatchParam.scratchOffset = scratch->offset_;
|
||||
@@ -3640,8 +3639,7 @@ void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispat
|
||||
kernelInfo.scratchBufferSizeInBytes = scratchBuf->size();
|
||||
|
||||
// Get the address of the scratch buffer and its size for CPU access
|
||||
address scratchRingAddr = nullptr;
|
||||
scratchRingAddr = static_cast<address>(scratchBuf->map(nullptr, 0));
|
||||
address scratchRingAddr = static_cast<address>(scratchBuf->map(nullptr, 0));
|
||||
dbgManager->setScratchRing(scratchRingAddr, scratchBuf->size());
|
||||
scratchBuf->unmap(nullptr);
|
||||
} else {
|
||||
|
||||
@@ -34,7 +34,7 @@ void* ProDevice::lib_drm_handle_ = nullptr;
|
||||
bool ProDevice::initialized_ = false;
|
||||
drm::Funcs ProDevice::funcs_;
|
||||
|
||||
IProDevice* IProDevice::Init(uint32_t bus, uint32_t dev, uint32_t func)
|
||||
IProDevice* IProDevice::Init(uint32_t bus, uint32_t device, uint32_t func)
|
||||
{
|
||||
// Make sure DRM lib is initialized
|
||||
if (!ProDevice::DrmInit()) {
|
||||
|
||||
@@ -902,9 +902,6 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
static const bool CopyRect = false;
|
||||
// Flush DMA for ASYNC copy
|
||||
static const bool FlushDMA = true;
|
||||
amd::Image* dstImage = static_cast<amd::Image*>(dstMemory.owner());
|
||||
size_t imgRowPitch = size[0] * dstImage->getImageFormat().getElementSize();
|
||||
size_t imgSlicePitch = imgRowPitch * size[1];
|
||||
@@ -1123,9 +1120,6 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
static const bool CopyRect = false;
|
||||
// Flush DMA for ASYNC copy
|
||||
static const bool FlushDMA = true;
|
||||
amd::Image* srcImage = static_cast<amd::Image*>(srcMemory.owner());
|
||||
size_t imgRowPitch = size[0] * srcImage->getImageFormat().getElementSize();
|
||||
size_t imgSlicePitch = imgRowPitch * size[1];
|
||||
@@ -1609,9 +1603,9 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
|
||||
|
||||
const static uint CopyRectAlignment[3] = {16, 4, 1};
|
||||
|
||||
bool aligned;
|
||||
uint i;
|
||||
for (i = 0; i < sizeof(CopyRectAlignment) / sizeof(uint); i++) {
|
||||
bool aligned;
|
||||
// Check source alignments
|
||||
aligned = ((srcRectIn.rowPitch_ % CopyRectAlignment[i]) == 0);
|
||||
aligned &= ((srcRectIn.slicePitch_ % CopyRectAlignment[i]) == 0);
|
||||
@@ -2077,9 +2071,9 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
|
||||
const static uint CopyBuffAlignment[3] = {1 /*16*/, 1 /*4*/, 1};
|
||||
amd::Coord3D size(sizeIn[0], sizeIn[1], sizeIn[2]);
|
||||
|
||||
bool aligned = false;
|
||||
uint i;
|
||||
for (i = 0; i < sizeof(CopyBuffAlignment) / sizeof(uint); i++) {
|
||||
bool aligned = false;
|
||||
// Check source alignments
|
||||
aligned = ((srcOrigin[0] % CopyBuffAlignment[i]) == 0);
|
||||
// Check destination alignments
|
||||
|
||||
@@ -525,9 +525,6 @@ PerfCounter::~PerfCounter() {
|
||||
|
||||
bool PerfCounterProfile::initialize() {
|
||||
|
||||
uint32_t cmd_buf_size;
|
||||
uint32_t out_buf_size;
|
||||
|
||||
// save the current command and output buffer information
|
||||
hsa_ven_amd_aqlprofile_descriptor_t cmd_buf = profile_.command_buffer;
|
||||
hsa_ven_amd_aqlprofile_descriptor_t out_buf = profile_.output_buffer;
|
||||
|
||||
@@ -434,10 +434,11 @@ bool Device::init() {
|
||||
HIP_VISIBLE_DEVICES : CUDA_VISIBLE_DEVICES)
|
||||
: GPU_DEVICE_ORDINAL;
|
||||
if (ordinals[0] != '\0') {
|
||||
size_t end, pos = 0;
|
||||
size_t pos = 0;
|
||||
std::vector<hsa_agent_t> valid_agents;
|
||||
std::set<size_t> valid_indexes;
|
||||
do {
|
||||
size_t end;
|
||||
bool deviceIdValid = true;
|
||||
end = ordinals.find_first_of(',', pos);
|
||||
if (end == std::string::npos) {
|
||||
@@ -2102,13 +2103,13 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* me
|
||||
bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset,
|
||||
unsigned int flags, void** dev_ptr) const {
|
||||
amd::Memory* amd_mem_obj = nullptr;
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
void* orig_dev_ptr = nullptr;
|
||||
|
||||
// Retrieve the devPtr from the handle
|
||||
hsa_status = hsa_amd_ipc_memory_attach(reinterpret_cast<const hsa_amd_ipc_memory_t*>(handle),
|
||||
mem_size, (1 + p2p_agents_.size()), p2p_agents_list_,
|
||||
&orig_dev_ptr);
|
||||
hsa_status_t hsa_status =
|
||||
hsa_amd_ipc_memory_attach(reinterpret_cast<const hsa_amd_ipc_memory_t*>(handle),
|
||||
mem_size, (1 + p2p_agents_.size()), p2p_agents_list_,
|
||||
&orig_dev_ptr);
|
||||
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("HSA failed to attach IPC memory with status: %d \n", hsa_status);
|
||||
@@ -2537,8 +2538,7 @@ bool Device::SvmAllocInit(void* memory, size_t size) const {
|
||||
|
||||
// ================================================================================================
|
||||
void Device::svmFree(void* ptr) const {
|
||||
amd::Memory* svmMem = nullptr;
|
||||
svmMem = amd::MemObjMap::FindMemObj(ptr);
|
||||
amd::Memory* svmMem = amd::MemObjMap::FindMemObj(ptr);
|
||||
if (nullptr != svmMem) {
|
||||
amd::MemObjMap::RemoveMemObj(svmMem->getSvmPtr());
|
||||
svmMem->release();
|
||||
@@ -2684,8 +2684,7 @@ hsa_queue_t* Device::acquireQueue(uint32_t queue_size_hint, bool coop_queue,
|
||||
|
||||
// default priority is normal so no need to set it again
|
||||
if (queue_priority != HSA_AMD_QUEUE_PRIORITY_NORMAL) {
|
||||
hsa_status_t st = HSA_STATUS_SUCCESS;
|
||||
st = hsa_amd_queue_set_priority(queue, queue_priority);
|
||||
hsa_status_t st = hsa_amd_queue_set_priority(queue, queue_priority);
|
||||
if (st != HSA_STATUS_SUCCESS) {
|
||||
DevLogError("Device::acquireQueue: hsa_amd_queue_set_priority failed!");
|
||||
hsa_queue_destroy(queue);
|
||||
@@ -2733,8 +2732,7 @@ hsa_queue_t* Device::acquireQueue(uint32_t queue_size_hint, bool coop_queue,
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "setting CU mask 0x%s for hardware queue %p",
|
||||
ss.str().c_str(), queue);
|
||||
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
status = hsa_amd_queue_cu_set_mask(queue, mask.size() * 32, mask.data());
|
||||
hsa_status_t status = hsa_amd_queue_cu_set_mask(queue, mask.size() * 32, mask.data());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
DevLogError("Device::acquireQueue: hsa_amd_queue_cu_set_mask failed!");
|
||||
hsa_queue_destroy(queue);
|
||||
|
||||
@@ -1381,7 +1381,6 @@ bool Image::ValidateMemory() {
|
||||
if (dev().settings().imageBufferWar_ && linearLayout && (owner() != nullptr) &&
|
||||
((owner()->asImage()->getWidth() * owner()->asImage()->getImageFormat().getElementSize()) <
|
||||
owner()->asImage()->getRowPitch())) {
|
||||
constexpr bool ForceLinear = true;
|
||||
amd::Image* img = owner()->asImage();
|
||||
// Create a native image without pitch for validation
|
||||
copyImageBuffer_ =
|
||||
|
||||
@@ -295,7 +295,6 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
|
||||
if (posStart != std::string::npos) {
|
||||
bool printFloat = false;
|
||||
int vectorSize = 0;
|
||||
size_t length;
|
||||
size_t idPos = 0;
|
||||
|
||||
// Search for PrintfDbg specifier in the format string.
|
||||
@@ -332,6 +331,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
|
||||
|
||||
// Is it a scalar value?
|
||||
if (vectorSize == 0) {
|
||||
size_t length;
|
||||
length = outputArgument(fmt, printFloat, info.arguments_[j], &s[i]);
|
||||
if (0 == length) {
|
||||
return;
|
||||
|
||||
@@ -101,10 +101,10 @@ bool Program::defineGlobalVar(const char* name, void* dptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_agent_t hsa_device = rocDevice().getBackendDevice();
|
||||
|
||||
status = hsa_executable_agent_global_variable_define(hsaExecutable_, hsa_device, name, dptr);
|
||||
hsa_status_t status = hsa_executable_agent_global_variable_define(hsaExecutable_,
|
||||
hsa_device, name, dptr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog_ += "Error: Could not define global variable : ";
|
||||
buildLog_ += hsa_strerror(status);
|
||||
|
||||
@@ -61,8 +61,8 @@ class Program : public device::Program {
|
||||
return hsaExecutable_;
|
||||
}
|
||||
|
||||
virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** dptr,
|
||||
size_t* bytes, const char* globalName) const;
|
||||
virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
|
||||
size_t* bytes, const char* global_name) const;
|
||||
|
||||
protected:
|
||||
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
|
||||
|
||||
@@ -54,8 +54,7 @@ Settings::Settings() {
|
||||
// operates or is programmed to be in Coherent mode.
|
||||
// Users can turn it off for hardware that does not
|
||||
// support this feature naturally
|
||||
char* nonCoherentMode = nullptr;
|
||||
nonCoherentMode = getenv("OPENCL_USE_NC_MEMORY_POLICY");
|
||||
char* nonCoherentMode = getenv("OPENCL_USE_NC_MEMORY_POLICY");
|
||||
enableNCMode_ = (nonCoherentMode) ? true : false;
|
||||
|
||||
// Disable image DMA by default (ROCM runtime doesn't support it)
|
||||
|
||||
@@ -283,7 +283,6 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
// ================================================================================================
|
||||
void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
if (numMemObjectsInQueue_ > 0) {
|
||||
size_t i, j;
|
||||
if (all) {
|
||||
endMemObjectsInQueue_ = numMemObjectsInQueue_;
|
||||
}
|
||||
@@ -291,6 +290,7 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
// If the current launch didn't start from the beginning, then move the data
|
||||
if (0 != endMemObjectsInQueue_) {
|
||||
// Preserve all objects from the current kernel
|
||||
size_t i, j;
|
||||
for (i = 0, j = endMemObjectsInQueue_; j < numMemObjectsInQueue_; i++, j++) {
|
||||
memObjectsInQueue_[i].start_ = memObjectsInQueue_[j].start_;
|
||||
memObjectsInQueue_[i].end_ = memObjectsInQueue_[j].end_;
|
||||
|
||||
@@ -395,7 +395,7 @@ bool Elf::setupShdr (
|
||||
ElfSections id,
|
||||
section* section,
|
||||
Elf64_Word shlink
|
||||
)
|
||||
) const
|
||||
{
|
||||
section->set_addr_align(ElfSecDesc[id].d_align);
|
||||
section->set_type(ElfSecDesc[id].sh_type);
|
||||
@@ -469,7 +469,7 @@ bool Elf::setTarget(uint16_t machine, ElfPlatform platform)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Elf::getType(uint16_t &type) {
|
||||
bool Elf::getType(uint16_t &type) const {
|
||||
type = _elfio.get_type();
|
||||
return true;
|
||||
}
|
||||
@@ -479,7 +479,7 @@ bool Elf::setType(uint16_t type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Elf::getFlags(uint32_t &flag) {
|
||||
bool Elf::getFlags(uint32_t &flag) const {
|
||||
flag = _elfio.get_flags();
|
||||
return true;
|
||||
}
|
||||
@@ -523,7 +523,7 @@ unsigned int Elf::getSegmentNum() const {
|
||||
return _elfio.segments.size();
|
||||
}
|
||||
|
||||
bool Elf::getSegment(const unsigned int index, segment*& seg) {
|
||||
bool Elf::getSegment(const unsigned int index, segment*& seg) const {
|
||||
bool ret = false;
|
||||
if (index < _elfio.segments.size()) {
|
||||
seg = _elfio.segments[index];
|
||||
@@ -946,7 +946,7 @@ bool Elf::dumpImage(char** buff, size_t* len)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Elf::dumpImage(std::istream &is, char **buff, size_t *len) {
|
||||
bool Elf::dumpImage(std::istream &is, char **buff, size_t *len) const {
|
||||
if (buff == nullptr || len == nullptr) {
|
||||
return false;
|
||||
}
|
||||
@@ -1068,7 +1068,7 @@ void* Elf::calloc(size_t sz)
|
||||
void
|
||||
Elf::elfMemoryRelease()
|
||||
{
|
||||
for(EMemory::iterator it = _elfMemory.begin(); it != _elfMemory.end(); it++) {
|
||||
for(EMemory::iterator it = _elfMemory.begin(); it != _elfMemory.end(); ++it) {
|
||||
free(it->first);
|
||||
}
|
||||
_elfMemory.clear();
|
||||
|
||||
@@ -253,7 +253,7 @@ public:
|
||||
* if dumpImage() succeeds.
|
||||
*/
|
||||
bool dumpImage(char** buff, size_t* len);
|
||||
bool dumpImage(std::istream& is, char** buff, size_t* len);
|
||||
bool dumpImage(std::istream& is, char** buff, size_t* len) const;
|
||||
|
||||
/*
|
||||
* If the session doesn't exist, create a new ELF section with data <d_buf, d_size>;
|
||||
@@ -320,11 +320,11 @@ public:
|
||||
bool setTarget(uint16_t machine, ElfPlatform platform);
|
||||
|
||||
/* Get/set elf type field from header */
|
||||
bool getType(uint16_t &type);
|
||||
bool getType(uint16_t &type) const;
|
||||
bool setType(uint16_t type);
|
||||
|
||||
/* Get/set elf flag field from header */
|
||||
bool getFlags(uint32_t &flag);
|
||||
bool getFlags(uint32_t &flag) const;
|
||||
bool setFlags(uint32_t flag);
|
||||
|
||||
/*
|
||||
@@ -333,9 +333,9 @@ public:
|
||||
*/
|
||||
bool Clear();
|
||||
|
||||
unsigned char getELFClass() { return _eclass; }
|
||||
unsigned char getELFClass() const { return _eclass; }
|
||||
|
||||
bool isSuccessful() { return _successful; }
|
||||
bool isSuccessful() const { return _successful; }
|
||||
|
||||
bool isHsaCo() const { return _elfio.get_machine() == EM_AMDGPU; }
|
||||
|
||||
@@ -343,7 +343,7 @@ public:
|
||||
unsigned int getSegmentNum() const;
|
||||
|
||||
/* Return segment at index */
|
||||
bool getSegment(const unsigned int index, segment*& seg);
|
||||
bool getSegment(const unsigned int index, segment*& seg) const;
|
||||
|
||||
/* Return size of elf file */
|
||||
static uint64_t getElfSize(const void *emi);
|
||||
@@ -369,7 +369,7 @@ private:
|
||||
ElfSections id,
|
||||
section* section,
|
||||
Elf64_Word shlink = 0
|
||||
);
|
||||
) const ;
|
||||
|
||||
/*
|
||||
* Create a new data into an existing section.
|
||||
|
||||
@@ -862,9 +862,7 @@ bool Os::MemoryMapFileDesc(FileDesc fdesc, size_t fsize, size_t foffset, const v
|
||||
return false;
|
||||
}
|
||||
|
||||
HANDLE map_handle = INVALID_HANDLE_VALUE;
|
||||
|
||||
map_handle = CreateFileMappingA(fdesc, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
HANDLE map_handle = CreateFileMappingA(fdesc, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (map_handle == INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(map_handle);
|
||||
return false;
|
||||
@@ -888,16 +886,13 @@ bool Os::MemoryMapFile(const char* fname, const void** mmap_ptr, size_t* mmap_si
|
||||
return false;
|
||||
}
|
||||
|
||||
HANDLE map_handle = INVALID_HANDLE_VALUE;
|
||||
HANDLE file_handle = INVALID_HANDLE_VALUE;
|
||||
|
||||
file_handle = CreateFileA(fname, GENERIC_READ, 0, NULL, OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_READONLY, NULL);
|
||||
HANDLE file_handle = CreateFileA(fname, GENERIC_READ, 0, NULL, OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_READONLY, NULL);
|
||||
if (file_handle == INVALID_HANDLE_VALUE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
map_handle = CreateFileMappingA(file_handle, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
HANDLE map_handle = CreateFileMappingA(file_handle, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (map_handle == INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(file_handle);
|
||||
return false;
|
||||
|
||||
@@ -276,7 +276,6 @@ DeviceQueue::~DeviceQueue() {
|
||||
}
|
||||
|
||||
bool DeviceQueue::create() {
|
||||
static const bool InteropQueue = true;
|
||||
const bool defaultDeviceQueue = properties().test(CL_QUEUE_ON_DEVICE_DEFAULT);
|
||||
bool result = false;
|
||||
|
||||
|
||||
@@ -1178,10 +1178,9 @@ bool Image::Format::isSupported(const Context& context, cl_mem_object_type image
|
||||
// ================================================================================================
|
||||
Image* Image::createView(const Context& context, const Format& format, device::VirtualDevice* vDev,
|
||||
uint baseMipLevel, cl_mem_flags flags) {
|
||||
Image* view = nullptr;
|
||||
|
||||
// Find the image dimensions and create a corresponding object
|
||||
view = new (context) Image(format, *this, baseMipLevel, flags);
|
||||
Image* view = new (context) Image(format, *this, baseMipLevel, flags);
|
||||
|
||||
if (view != nullptr) {
|
||||
// Set GPU virtual device for this view
|
||||
@@ -1234,12 +1233,11 @@ bool Image::isRowSliceValid(size_t rowPitch, size_t slice, size_t width, size_t
|
||||
}
|
||||
|
||||
void Image::copyToBackingStore(void* initFrom) {
|
||||
char* src;
|
||||
char* dst = reinterpret_cast<char*>(getHostMem());
|
||||
size_t cpySize = getWidth() * getImageFormat().getElementSize();
|
||||
|
||||
for (uint z = 0; z < getDepth(); ++z) {
|
||||
src = reinterpret_cast<char*>(initFrom) + z * getSlicePitch();
|
||||
char* src = reinterpret_cast<char*>(initFrom) + z * getSlicePitch();
|
||||
for (uint y = 0; y < getHeight(); ++y) {
|
||||
memcpy(dst, src, cpySize);
|
||||
dst += cpySize;
|
||||
|
||||
Ссылка в новой задаче
Block a user