ファイル
rocm-systems/rocclr/runtime/device/gpu/gpuwavelimiter.cpp
T
foreman 5cb151c778 P4 to Git Change 1143317 by yaxunl@yaxunl_stg_win50 on 2015/04/22 11:23:22
ECR #304775 - Wave limiter: fix bug about some variables not initialized before being used.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#2 edit
2015-04-22 11:31:43 -04:00

191 行
4.8 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/gpu/gpukernel.hpp"
#include "device/gpu/gpuwavelimiter.hpp"
#include "os/os.hpp"
#include "utils/flags.hpp"
namespace gpu {
uint WaveLimiter::MaxWave;
uint WaveLimiter::WarmUpCount;
uint WaveLimiter::AdaptCount;
uint WaveLimiter::RunCount;
uint WaveLimiter::AbandonThresh;
void WaveLimiter::clearData() {
waves_ = MaxWave;
countAll_ = 0;
clear(counts_);
clear(sum_);
clear(average_);
clear(ratio_);
}
void WaveLimiter::enable() {
if (waves_ > 0) {
return;
}
auto gpuDev = reinterpret_cast<const Device*>(&owner_->dev());
auto hwInfo = gpuDev->hwInfo();
// Enable it only for SI+, unless GPU_WAVE_LIMIT_ENABLE is set to 1
setIfNotDefault(enable_, GPU_WAVE_LIMIT_ENABLE,
owner_->workGroupInfo()->limitWave_ && gpuDev->settings().siPlus_);
if (!enable_) {
return;
}
waves_ = MaxWave;
}
WaveLimiter::WaveLimiter(Kernel *owner) :
owner_(owner), dumper_(owner_->name()) {
auto gpuDev = reinterpret_cast<const Device*>(&owner_->dev());
auto attrib = gpuDev->getAttribs();
auto hwInfo = gpuDev->hwInfo();
setIfNotDefault(SIMDPerSH_, GPU_WAVE_LIMIT_CU_PER_SH,
attrib.numberOfCUsperShaderArray * hwInfo->simdPerCU_);
MaxWave = GPU_WAVE_LIMIT_MAX_WAVE;
WarmUpCount = GPU_WAVE_LIMIT_WARMUP;
AdaptCount = GPU_WAVE_LIMIT_ADAPT * MaxWave;
RunCount = GPU_WAVE_LIMIT_RUN * MaxWave;
AbandonThresh = GPU_WAVE_LIMIT_ABANDON;
state_ = WARMUP;
dynRunCount_ = RunCount;
auto size = MaxWave + 1;
counts_.resize(size);
sum_.resize(size);
average_.resize(size);
ratio_.resize(size);
clearData();
if (!flagIsDefault(GPU_WAVE_LIMIT_TRACE)) {
traceStream_.open(std::string(GPU_WAVE_LIMIT_TRACE) + owner_->name() +
".txt");
}
waves_ = GPU_WAVES_PER_SIMD;
bestWave_ = MaxWave;
enable_ = false;
}
WaveLimiter::~WaveLimiter() {
if (traceStream_.is_open()) {
traceStream_.close();
}
}
uint WaveLimiter::getWavesPerSH() const {
return waves_ * SIMDPerSH_;
}
void WaveLimiter::updateData(ulong time) {
sum_[waves_] += time;
counts_[waves_]++;
average_[waves_] = sum_[waves_] / counts_[waves_];
ratio_[waves_] = average_[waves_] * 100 / average_[MaxWave];
if (average_[bestWave_] > average_[waves_]) {
bestWave_ = waves_;
}
outputTrace();
}
void WaveLimiter::outputTrace() {
if (!traceStream_.is_open()) {
return;
}
traceStream_ << "[WaveLimiter] " << owner_->name() << " state=" << state_
<< " waves=" << waves_ << " bestWave=" << bestWave_ << '\n';
output(traceStream_, "\n counts = ", counts_);
output(traceStream_, "\n sum = ", sum_);
output(traceStream_, "\n average = ", average_);
output(traceStream_, "\n ratio = ", ratio_);
traceStream_ << "\n\n";
}
void WaveLimiter::callback(ulong duration) {
dumper_.addData(duration, waves_, static_cast<char>(state_));
if (!enable_) {
return;
}
countAll_++;
switch (state_) {
case WARMUP:
if (countAll_ < WarmUpCount) {
return;
}
state_ = ADAPT;
bestWave_ = MaxWave;
clearData();
return;
case ADAPT:
updateData(duration);
if (countAll_ < AdaptCount && ratio_[waves_] < AbandonThresh) {
waves_ = MaxWave - (countAll_ % MaxWave);
return;
}
waves_ = bestWave_;
if (countAll_ >= AdaptCount) {
dynRunCount_ = RunCount;
} else {
dynRunCount_ = AdaptCount;
}
countAll_ = rand() % MaxWave;
state_ = RUN;
return;
case RUN:
if (countAll_ < dynRunCount_) {
return;
}
state_ = ADAPT;
bestWave_ = MaxWave;
clearData();
return;
}
}
WaveLimiter::DataDumper::DataDumper(const std::string &kernelName) {
enable_ = !flagIsDefault(GPU_WAVE_LIMIT_DUMP);
if (enable_) {
fileName_ = std::string(GPU_WAVE_LIMIT_DUMP) + kernelName + ".csv";
}
}
WaveLimiter::DataDumper::~DataDumper() {
if (!enable_) {
return;
}
std::ofstream OFS(fileName_);
for (size_t i = 0, e = time_.size(); i != e; ++i) {
OFS << i << ',' << time_[i] << ',' << wavePerSIMD_[i] << ','
<< static_cast<uint>(state_[i]) << '\n';
}
OFS.close();
}
void WaveLimiter::DataDumper::addData(ulong time, uint wave, char state) {
if (!enable_) {
return;
}
time_.push_back(time);
wavePerSIMD_.push_back(wave);
state_.push_back(state);
}
amd::ProfilingCallback* WaveLimiter::getProfilingCallback() const {
if (enable_ || dumper_.enabled()) {
return const_cast<WaveLimiter*>(this);
}
return NULL;
}
}