P4 to Git Change 1501728 by gandryey@gera-w8 on 2018/01/09 15:35:03
SWDEV-79445 - OCL generic changes and code clean-up
- Code style clean-up. No functional changes.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#71 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#70 edit
[ROCm/clr commit: 8e17a1e6b4]
This commit is contained in:
@@ -304,7 +304,6 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
} else {
|
||||
size_t dstSize = size[0];
|
||||
size_t tmpSize = 0;
|
||||
size_t offset = 0;
|
||||
size_t pinSize = dev().settings().pinnedXferSize_;
|
||||
pinSize = std::min(pinSize, dstSize);
|
||||
@@ -320,12 +319,12 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
|
||||
amd::Memory* pinned = NULL;
|
||||
bool first = true;
|
||||
size_t tmpSize;
|
||||
size_t pinAllocSize;
|
||||
|
||||
// Copy memory, using pinning
|
||||
while (dstSize > 0) {
|
||||
// If it's the first iterarion, then readjust the copy size
|
||||
size_t tmpSize;
|
||||
// If it's the first iterarion, then readjust the copy size
|
||||
// to include alignment
|
||||
if (first) {
|
||||
pinAllocSize = amd::alignUp(pinSize + partial, PinnedMemoryAlignment);
|
||||
|
||||
@@ -1051,7 +1051,7 @@ static void parseRequestedDeviceList(requestedDevices_t& requestedDevices) {
|
||||
int currentDeviceIndex = atoi(pch);
|
||||
// Validate device index.
|
||||
for (size_t i = 0; i < strlen(pch); i++) {
|
||||
if (!isdigit(pch[i])) {
|
||||
if (!isdigit(reinterpret_cast<unsigned char*>(pch)[i])) {
|
||||
deviceIdValid = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -507,7 +507,6 @@ void HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf) {
|
||||
}
|
||||
std::string pfmt = aclPrintf->fmtStr;
|
||||
info.fmtString_.clear();
|
||||
size_t pos = 0;
|
||||
bool need_nl = true;
|
||||
for (size_t pos = 0; pos < pfmt.size(); ++pos) {
|
||||
char symbol = pfmt[pos];
|
||||
|
||||
@@ -512,7 +512,7 @@ uint32_t* PrintfDbg::mapWorkitem(VirtualGPU& gpu, size_t idx, bool* realloc) {
|
||||
}
|
||||
|
||||
// Get a pointer to the workitem data
|
||||
uint32_t* workitem = reinterpret_cast<uint32_t*>(xferBufRead_->map(&gpu));
|
||||
workitem = reinterpret_cast<uint32_t*>(xferBufRead_->map(&gpu));
|
||||
|
||||
return workitem;
|
||||
}
|
||||
|
||||
@@ -1922,13 +1922,13 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < iteration; j++) {
|
||||
for (int iter = 0; iter < iteration; ++iter) {
|
||||
GpuEvent gpuEvent(queues_[MainEngine]->cmdBufId());
|
||||
uint32_t id = gpuEvent.id;
|
||||
// Reset global size for dimension dim if split is needed
|
||||
if (dim != -1) {
|
||||
newOffset[dim] = sizes.offset()[dim] + globalStep * j;
|
||||
if (((newOffset[dim] + globalStep) < sizes.global()[dim]) && (j != (iteration - 1))) {
|
||||
newOffset[dim] = sizes.offset()[dim] + globalStep * iter;
|
||||
if (((newOffset[dim] + globalStep) < sizes.global()[dim]) && (iter != (iteration - 1))) {
|
||||
newGlobalSize[dim] = globalStep;
|
||||
} else {
|
||||
newGlobalSize[dim] = sizes.global()[dim] - newOffset[dim];
|
||||
@@ -2642,7 +2642,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
wait |= state_.forceWait_;
|
||||
// Loop through all outstanding command batches
|
||||
while (!cbQueue_.empty()) {
|
||||
auto cb = cbQueue_.front();
|
||||
cb = cbQueue_.front();
|
||||
// Check if command batch finished without a wait
|
||||
bool finished = true;
|
||||
for (uint i = 0; i < AllEngines; ++i) {
|
||||
@@ -2941,11 +2941,10 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
size_t execInfoOffset = kernelParams.getExecInfoOffset();
|
||||
bool sync = true;
|
||||
|
||||
amd::Memory* memory = nullptr;
|
||||
// get svm non arugment information
|
||||
void* const* svmPtrArray = reinterpret_cast<void* const*>(params + execInfoOffset);
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
memory = amd::SvmManager::FindSvmBuffer(svmPtrArray[i]);
|
||||
amd::Memory* memory = amd::SvmManager::FindSvmBuffer(svmPtrArray[i]);
|
||||
if (nullptr == memory) {
|
||||
if (!supportFineGrainedSystem) {
|
||||
return false;
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user