f435a5144a
SWDEV-170771 - [OCL][ PAL issue][Veg10]:Ocltst test fail from OCL Sanity observed by debug driver CL#1621748 - Make a workaround for assert in PAL. It's unclear if we need the tokens in the trace, so for now use PAL defined mask for all. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#8 edit
167 γραμμές
4.6 KiB
C++
167 γραμμές
4.6 KiB
C++
//
|
|
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "device/pal/palthreadtrace.hpp"
|
|
#include "device/pal/palvirtual.hpp"
|
|
|
|
namespace pal {
|
|
|
|
PalThreadTraceReference* PalThreadTraceReference::Create(VirtualGPU& gpu) {
|
|
Pal::Result result;
|
|
|
|
// Create performance experiment
|
|
Pal::PerfExperimentCreateInfo createInfo = {};
|
|
|
|
createInfo.optionFlags.sampleInternalOperations = 1;
|
|
createInfo.optionFlags.cacheFlushOnCounterCollection = 1;
|
|
createInfo.optionFlags.sqShaderMask = 1;
|
|
createInfo.optionValues.sampleInternalOperations = true;
|
|
createInfo.optionValues.cacheFlushOnCounterCollection = true;
|
|
createInfo.optionValues.sqShaderMask = Pal::PerfShaderMaskCs;
|
|
|
|
size_t palExperSize = gpu.dev().iDev()->GetPerfExperimentSize(createInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
PalThreadTraceReference* memRef = new (palExperSize) PalThreadTraceReference(gpu);
|
|
if (memRef != nullptr) {
|
|
result = gpu.dev().iDev()->CreatePerfExperiment(createInfo, &memRef[1], &memRef->perfExp_);
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
return memRef;
|
|
}
|
|
|
|
PalThreadTraceReference::~PalThreadTraceReference() {
|
|
// The thread trace object is always associated with a particular queue,
|
|
// so we have to lock just this queue
|
|
amd::ScopedLock lock(gpu_.execution());
|
|
|
|
delete layout_;
|
|
delete memory_;
|
|
|
|
if (nullptr != iPerf()) {
|
|
iPerf()->Destroy();
|
|
}
|
|
}
|
|
|
|
bool PalThreadTraceReference::finalize() {
|
|
Pal::Result result;
|
|
|
|
iPerf()->Finalize();
|
|
|
|
// Acquire GPU memory for the query from the pool and bind it.
|
|
Pal::GpuMemoryRequirements gpuMemReqs = {};
|
|
iPerf()->GetGpuMemoryRequirements(&gpuMemReqs);
|
|
memory_ = new Memory(gpu().dev(), amd::alignUp(gpuMemReqs.size, gpuMemReqs.alignment));
|
|
|
|
if (nullptr == memory_) {
|
|
return false;
|
|
}
|
|
|
|
if (!memory_->create(Resource::Local)) {
|
|
return false;
|
|
}
|
|
|
|
gpu_.queue(gpu_.engineID_).addMemRef(memory_->iMem());
|
|
|
|
result = iPerf()->BindGpuMemory(memory_->iMem(), memory_->offset());
|
|
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
|
|
Pal::ThreadTraceLayout layout = {};
|
|
iPerf()->GetThreadTraceLayout(&layout);
|
|
|
|
size_t size =
|
|
sizeof(Pal::ThreadTraceLayout) + (sizeof(Pal::ThreadTraceSeLayout) * (layout.traceCount - 1));
|
|
layout_ = reinterpret_cast<Pal::ThreadTraceLayout*>(new char[size]);
|
|
if (layout_ == nullptr) {
|
|
return false;
|
|
}
|
|
|
|
layout_->traceCount = layout.traceCount;
|
|
iPerf()->GetThreadTraceLayout(layout_);
|
|
|
|
return true;
|
|
}
|
|
|
|
void PalThreadTraceReference::copyToUserBuffer(Memory* dstMem, uint seIndex) {
|
|
amd::Coord3D srcOrigin(layout_->traces[seIndex].dataOffset, 0, 0);
|
|
amd::Coord3D dstOrigin(0, 0, 0);
|
|
amd::Coord3D size(dstMem->size(), 0, 0);
|
|
|
|
gpu_.blitMgr().copyBuffer(*memory_, *dstMem, srcOrigin, dstOrigin, size, true);
|
|
}
|
|
|
|
ThreadTrace::~ThreadTrace() {
|
|
if (palRef_ == nullptr) {
|
|
return;
|
|
}
|
|
|
|
// Release the thread trace reference object
|
|
palRef_->release();
|
|
}
|
|
|
|
bool ThreadTrace::create() {
|
|
palRef_->retain();
|
|
|
|
size_t se = 0;
|
|
for (auto itMemObj = memObj_.begin(); itMemObj != memObj_.end(); ++itMemObj, ++se) {
|
|
// Initialize the thread trace
|
|
Pal::ThreadTraceInfo sqttInfo = {};
|
|
sqttInfo.traceType = Pal::PerfTraceType::ThreadTrace;
|
|
sqttInfo.instance = se;
|
|
|
|
sqttInfo.optionFlags.bufferSize = 1;
|
|
// PAL requires ThreadTrace buffer aligned to 4KB
|
|
sqttInfo.optionValues.bufferSize =
|
|
amd::alignUp(dev().getGpuMemory(*itMemObj)->size(), (0x1 << 12));
|
|
sqttInfo.optionFlags.threadTraceTokenConfig = 1;
|
|
sqttInfo.optionValues.threadTraceTokenConfig.tokenMask = Pal::ThreadTraceTokenTypeFlags::All;
|
|
|
|
Pal::Result result = iPerf()->AddThreadTrace(sqttInfo);
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void ThreadTrace::populateUserMemory() {
|
|
uint se = 0;
|
|
for (auto itMemObj = memObj_.begin(); itMemObj != memObj_.end(); ++itMemObj, ++se) {
|
|
palRef_->copyToUserBuffer(dev().getGpuMemory(*itMemObj), se);
|
|
}
|
|
}
|
|
|
|
bool ThreadTrace::info(uint infoType, uint* info, uint infoSize) const {
|
|
switch (infoType) {
|
|
case CL_THREAD_TRACE_BUFFERS_SIZE: {
|
|
if (infoSize < numSe_) {
|
|
LogError("The amount of buffers should be equal to the amount of Shader Engines");
|
|
return false;
|
|
} else {
|
|
uint se = 0;
|
|
for (auto itMemObj = memObj_.begin(); itMemObj != memObj_.end(); ++itMemObj, ++se) {
|
|
info[se] = dev().getGpuMemory(*itMemObj)->size();
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
LogError("Wrong ThreadTrace::getInfo parameter");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace pal
|