Files
rocm-systems/rocclr/runtime/device/pal/palthreadtrace.cpp
T
foreman f435a5144a P4 to Git Change 1705130 by gandryey@gera-w8 on 2018/11/09 15:05:26
SWDEV-170771 - [OCL][ PAL issue][Veg10]:Ocltst test fail from OCL Sanity observed by debug driver CL#1621748
	- Make a workaround for assert in PAL. It's unclear if we need the tokens in the trace, so for now use PAL defined mask for all.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#8 edit
2018-11-09 15:20:44 -05:00

167 строки
4.6 KiB
C++

//
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/pal/palthreadtrace.hpp"
#include "device/pal/palvirtual.hpp"
namespace pal {
PalThreadTraceReference* PalThreadTraceReference::Create(VirtualGPU& gpu) {
Pal::Result result;
// Create performance experiment
Pal::PerfExperimentCreateInfo createInfo = {};
createInfo.optionFlags.sampleInternalOperations = 1;
createInfo.optionFlags.cacheFlushOnCounterCollection = 1;
createInfo.optionFlags.sqShaderMask = 1;
createInfo.optionValues.sampleInternalOperations = true;
createInfo.optionValues.cacheFlushOnCounterCollection = true;
createInfo.optionValues.sqShaderMask = Pal::PerfShaderMaskCs;
size_t palExperSize = gpu.dev().iDev()->GetPerfExperimentSize(createInfo, &result);
if (result != Pal::Result::Success) {
return nullptr;
}
PalThreadTraceReference* memRef = new (palExperSize) PalThreadTraceReference(gpu);
if (memRef != nullptr) {
result = gpu.dev().iDev()->CreatePerfExperiment(createInfo, &memRef[1], &memRef->perfExp_);
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
}
}
return memRef;
}
PalThreadTraceReference::~PalThreadTraceReference() {
// The thread trace object is always associated with a particular queue,
// so we have to lock just this queue
amd::ScopedLock lock(gpu_.execution());
delete layout_;
delete memory_;
if (nullptr != iPerf()) {
iPerf()->Destroy();
}
}
bool PalThreadTraceReference::finalize() {
Pal::Result result;
iPerf()->Finalize();
// Acquire GPU memory for the query from the pool and bind it.
Pal::GpuMemoryRequirements gpuMemReqs = {};
iPerf()->GetGpuMemoryRequirements(&gpuMemReqs);
memory_ = new Memory(gpu().dev(), amd::alignUp(gpuMemReqs.size, gpuMemReqs.alignment));
if (nullptr == memory_) {
return false;
}
if (!memory_->create(Resource::Local)) {
return false;
}
gpu_.queue(gpu_.engineID_).addMemRef(memory_->iMem());
result = iPerf()->BindGpuMemory(memory_->iMem(), memory_->offset());
if (result != Pal::Result::Success) {
return false;
}
Pal::ThreadTraceLayout layout = {};
iPerf()->GetThreadTraceLayout(&layout);
size_t size =
sizeof(Pal::ThreadTraceLayout) + (sizeof(Pal::ThreadTraceSeLayout) * (layout.traceCount - 1));
layout_ = reinterpret_cast<Pal::ThreadTraceLayout*>(new char[size]);
if (layout_ == nullptr) {
return false;
}
layout_->traceCount = layout.traceCount;
iPerf()->GetThreadTraceLayout(layout_);
return true;
}
void PalThreadTraceReference::copyToUserBuffer(Memory* dstMem, uint seIndex) {
amd::Coord3D srcOrigin(layout_->traces[seIndex].dataOffset, 0, 0);
amd::Coord3D dstOrigin(0, 0, 0);
amd::Coord3D size(dstMem->size(), 0, 0);
gpu_.blitMgr().copyBuffer(*memory_, *dstMem, srcOrigin, dstOrigin, size, true);
}
ThreadTrace::~ThreadTrace() {
if (palRef_ == nullptr) {
return;
}
// Release the thread trace reference object
palRef_->release();
}
bool ThreadTrace::create() {
palRef_->retain();
size_t se = 0;
for (auto itMemObj = memObj_.begin(); itMemObj != memObj_.end(); ++itMemObj, ++se) {
// Initialize the thread trace
Pal::ThreadTraceInfo sqttInfo = {};
sqttInfo.traceType = Pal::PerfTraceType::ThreadTrace;
sqttInfo.instance = se;
sqttInfo.optionFlags.bufferSize = 1;
// PAL requires ThreadTrace buffer aligned to 4KB
sqttInfo.optionValues.bufferSize =
amd::alignUp(dev().getGpuMemory(*itMemObj)->size(), (0x1 << 12));
sqttInfo.optionFlags.threadTraceTokenConfig = 1;
sqttInfo.optionValues.threadTraceTokenConfig.tokenMask = Pal::ThreadTraceTokenTypeFlags::All;
Pal::Result result = iPerf()->AddThreadTrace(sqttInfo);
if (result != Pal::Result::Success) {
return false;
}
}
return true;
}
void ThreadTrace::populateUserMemory() {
uint se = 0;
for (auto itMemObj = memObj_.begin(); itMemObj != memObj_.end(); ++itMemObj, ++se) {
palRef_->copyToUserBuffer(dev().getGpuMemory(*itMemObj), se);
}
}
bool ThreadTrace::info(uint infoType, uint* info, uint infoSize) const {
switch (infoType) {
case CL_THREAD_TRACE_BUFFERS_SIZE: {
if (infoSize < numSe_) {
LogError("The amount of buffers should be equal to the amount of Shader Engines");
return false;
} else {
uint se = 0;
for (auto itMemObj = memObj_.begin(); itMemObj != memObj_.end(); ++itMemObj, ++se) {
info[se] = dev().getGpuMemory(*itMemObj)->size();
}
}
break;
}
default:
LogError("Wrong ThreadTrace::getInfo parameter");
return false;
}
return true;
}
} // namespace pal