ファイル
rocm-systems/projects/clr/rocclr/runtime/platform/program.cpp
T
foreman 239faab75e P4 to Git Change 1079952 by yaxunl@yaxunl_stg_win50 on 2014/09/23 12:31:16
ECR #377625 - Workaround for Blender performance issue. Lower available VGPRs to improve waves per CU.

	Added BuildOptsAppend to OCL app profile.
	Read BuildOptsAppend and append to build options.
	Added specific wave optimization option for Blender.

Affected files ...

... //depot/stg/opencl/drivers/opencl/appprofiles/oclappprofile.xml#7 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#170 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#230 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#63 edit


[ROCm/clr commit: 16f8ca9aae]
2014-09-23 12:44:50 -04:00

670 行
20 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "top.hpp"
#include "device/appprofile.hpp"
#include "platform/program.hpp"
#include "platform/context.hpp"
#include "utils/options.hpp"
#include <cstdlib> // for malloc
#include <cstring> // for strcmp
#include <utility>
namespace amd {
Program::~Program()
{
// Destroy all device programs
deviceprograms_t::const_iterator it, itEnd;
for (it = devicePrograms_.begin(), itEnd = devicePrograms_.end();
it != itEnd; ++it) {
delete it->second;
}
for (it = devProgramsNoOpt_.begin(), itEnd = devProgramsNoOpt_.end();
it != itEnd; ++it) {
delete it->second;
}
for (devicebinary_t::const_iterator IT = binary_.begin(), IE = binary_.end();
IT != IE; ++IT) {
const binary_t& Bin = IT->second;
if (Bin.first) {
delete [] Bin.first;
}
}
delete symbolTable_;
//! @todo Make sure we have destroyed all CPU specific objects
}
const Symbol*
Program::findSymbol(const char* kernelName) const
{
symbols_t::const_iterator it = symbolTable_->find(kernelName);
return (it == symbolTable_->end()) ? NULL : &it->second;
}
cl_int
Program::addDeviceProgram(Device& device, const void* image, size_t length, int oclVer)
{
if (image != NULL && !device.verifyBinaryImage(image, length)) {
return CL_INVALID_BINARY;
}
// Check if the device is already associated with this program
if (deviceList_.find(&device) != deviceList_.end()) {
return CL_INVALID_VALUE;
}
Device& rootDev = device.rootDevice();
// if the rootDev is already associated with a program
if (devicePrograms_[&rootDev] != NULL) {
return CL_SUCCESS;
}
device::Program* program = rootDev.createProgram(oclVer);
if (program == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (image != NULL) {
uint8_t* memory = binary(rootDev).first;
// clone 'binary' (it is owned by the host thread).
if (memory == NULL) {
memory = new (std::nothrow) uint8_t[length];
if (memory == NULL) {
delete program;
return CL_OUT_OF_HOST_MEMORY;
}
::memcpy(memory, image, length);
// Save the original image
binary_[&rootDev] = std::make_pair(memory, length);
}
if (!program->setBinary(reinterpret_cast<char *>(memory), length)) {
delete program;
return CL_INVALID_BINARY;
}
}
devicePrograms_[&rootDev] = program;
program = rootDev.createProgram(oclVer);
if (program == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
devProgramsNoOpt_[&rootDev] = program;
deviceList_.insert(&device);
return CL_SUCCESS;
}
device::Program*
Program::getDeviceProgram(const Device& device) const
{
deviceprograms_t::const_iterator it =
devicePrograms_.find(&device.rootDevice());
if (it == devicePrograms_.end()) {
return NULL;
}
return it->second;
}
Monitor
Program::buildLock_("OCL build program", true);
inline static int
GetOclCVersion(const char* clVer)
{
std::string clStd(clVer);
if (clStd == "CL1.0") {
return 100;
}
else if (clStd == "CL1.1") {
return 110;
}
else if (clStd == "CL1.2") {
return 120;
}
else {
if (clStd != "CL2.0") {
LogError("Unsupported OCL C version!");
}
return 200;
}
}
cl_int
Program::compile(
const std::vector<Device*>& devices,
size_t numHeaders,
const std::vector<const Program*>& headerPrograms,
const char** headerIncludeNames,
const char* options,
void (CL_CALLBACK * notifyFptr)(cl_program, void *),
void* data,
bool optionChangable)
{
ScopedLock sl(buildLock_);
cl_int retval = CL_SUCCESS;
// Clear the program object
clear();
// Process build options.
option::Options parsedOptions;
std::string cppstr(options ? options : "");
// if there is a -ignore-env, adjust options.
if (cppstr.size() > 0) {
// Set the options to be the string after -ignore-env
size_t pos = cppstr.find("-ignore-env");
if (pos != std::string::npos) {
cppstr = cppstr.substr(pos+sizeof("-ignore-env"));
optionChangable = false;
}
}
if (optionChangable) {
if (AMD_OCL_BUILD_OPTIONS != NULL) {
// Override options.
cppstr = AMD_OCL_BUILD_OPTIONS;
}
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
cppstr.append(" ");
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
}
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
}
}
if (!option::parseAllOptions(cppstr, parsedOptions)) {
programLog_ = parsedOptions.optionsLog();
return CL_INVALID_COMPILER_OPTIONS;
}
programLog_ = parsedOptions.optionsLog();
std::vector<const std::string*> headers(numHeaders);
for (size_t i = 0; i < numHeaders; ++i) {
const std::string& header = headerPrograms[i]->sourceCode();
headers[i] = &header;
}
// Compile the program programs associated with the given devices.
std::vector<Device*>::const_iterator it;
for (it = devices.begin(); it != devices.end(); ++it) {
device::Program* devProgram = getDeviceProgram(**it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
const int oclVer = GetOclCVersion(parsedOptions.oVariables->CLStd);
retval = addDeviceProgram(**it, bin.first, bin.second, oclVer);
if (retval != CL_SUCCESS) {
return retval;
}
devProgram = getDeviceProgram(**it);
}
if (devProgram->type() == device::Program::TYPE_INTERMEDIATE) {
continue;
}
// We only build a Device-Program once
if (devProgram->buildStatus() != CL_BUILD_NONE) {
continue;
}
if (sourceCode_.empty()) {
return CL_INVALID_OPERATION;
}
cl_int result = devProgram->compile(
sourceCode_, headers,
headerIncludeNames,
options,
&parsedOptions);
// Check if the previous device failed a build
if ((result != CL_SUCCESS) && (retval != CL_SUCCESS)) {
retval = CL_INVALID_OPERATION;
}
// Update the returned value with a build error
else if (result != CL_SUCCESS) {
retval = result;
}
}
if (notifyFptr != NULL) {
notifyFptr(as_cl(this), data);
}
return retval;
}
cl_int
Program::link(
const std::vector<Device*>& devices,
size_t numInputs,
const std::vector<Program*>& inputPrograms,
const char* options,
void (CL_CALLBACK * notifyFptr)(cl_program, void *),
void* data,
bool optionChangable)
{
ScopedLock sl(buildLock_);
cl_int retval = CL_SUCCESS;
if (symbolTable_ == NULL) {
symbolTable_ = new symbols_t;
if (symbolTable_ == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
}
// Clear the program object
clear();
// Process build options.
option::Options parsedOptions;
std::string cppstr(options ? options : "");
// if there is a -ignore-env, adjust options.
if (cppstr.size() > 0) {
// Set the options to be the string after -ignore-env
size_t pos = cppstr.find("-ignore-env");
if (pos != std::string::npos) {
cppstr = cppstr.substr(pos+sizeof("-ignore-env"));
optionChangable = false;
}
}
if (optionChangable) {
if (AMD_OCL_LINK_OPTIONS != NULL) {
// Override options.
cppstr = AMD_OCL_LINK_OPTIONS;
}
if (AMD_OCL_LINK_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_LINK_OPTIONS_APPEND);
}
}
if (!option::parseLinkOptions(cppstr, parsedOptions)) {
programLog_ = parsedOptions.optionsLog();
return CL_INVALID_LINKER_OPTIONS;
}
programLog_ = parsedOptions.optionsLog();
// Link the program programs associated with the given devices.
std::vector<Device*>::const_iterator it;
for (it = devices.begin(); it != devices.end(); ++it) {
// find the corresponding device program in each input program
std::vector<device::Program*> inputDevPrograms(numInputs);
bool found = false;
int maxOclVer = GetOclCVersion(parsedOptions.oVariables->CLStd);
for (size_t i = 0; i < numInputs; ++i) {
Program& inputProgram = *inputPrograms[i];
deviceprograms_t inputDevProgs = inputProgram.devicePrograms();
deviceprograms_t::const_iterator findIt = inputDevProgs.find(*it);
if (findIt == inputDevProgs.end()) {
if (found) break;
continue;
}
found = true;
inputDevPrograms[i] = findIt->second;
size_t pos = inputDevPrograms[i]->compileOptions().find("-cl-std=");
if (pos != std::string::npos) {
std::string clStd =
inputDevPrograms[i]->compileOptions().substr((pos+8), 5);
int oclVer = GetOclCVersion(clStd.c_str());
maxOclVer = (maxOclVer > oclVer) ? maxOclVer : oclVer;
}
}
if (inputDevPrograms.size() == 0) {
continue;
}
if (inputDevPrograms.size() < numInputs) {
return CL_INVALID_VALUE;
}
device::Program* devProgram = getDeviceProgram(**it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
retval = addDeviceProgram(**it, bin.first, bin.second, maxOclVer);
if (retval != CL_SUCCESS) {
return retval;
}
devProgram = getDeviceProgram(**it);
}
// We only build a Device-Program once
if (devProgram->buildStatus() != CL_BUILD_NONE) {
continue;
}
cl_int result = devProgram->link(
inputDevPrograms, options, &parsedOptions);
// Check if the previous device failed a build
if ((result != CL_SUCCESS) && (retval != CL_SUCCESS)) {
retval = CL_INVALID_OPERATION;
}
// Update the returned value with a build error
else if (result != CL_SUCCESS) {
retval = result;
}
}
// Rebuild the symbol table
deviceprograms_t::iterator sit;
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
const Device& device = *sit->first;
const device::Program& program = *sit->second;
const device::Program::kernels_t& kernels = program.kernels();
device::Program::kernels_t::const_iterator kit;
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
const std::string& name = kit->first;
const device::Kernel* devKernel = kit->second;
Symbol& symbol = (*symbolTable_)[name];
if (!symbol.setDeviceKernel(device, devKernel)) {
retval = CL_LINK_PROGRAM_FAILURE;
}
}
}
// Create a string with all kernel names from the program
if (kernelNames_.length() == 0) {
amd::Program::symbols_t::const_iterator it;
for (it = symbols().begin(); it != symbols().end(); ++it) {
if (it != symbols().begin()) {
kernelNames_.append(1, ';');
}
kernelNames_.append(it->first.c_str());
}
}
if (notifyFptr != NULL) {
notifyFptr(as_cl(this), data);
}
return retval;
}
cl_int
Program::build(
const std::vector<Device*>& devices,
const char* options,
void (CL_CALLBACK * notifyFptr)(cl_program, void *),
void* data,
bool optionChangable)
{
ScopedLock sl(buildLock_);
cl_int retval = CL_SUCCESS;
if (symbolTable_ == NULL) {
symbolTable_ = new symbols_t;
if (symbolTable_ == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
}
// Clear the program object
clear();
// Process build options.
option::Options parsedOptions;
std::string cppstr(options ? options : "");
// if there is a -ignore-env, adjust options.
if (cppstr.size() > 0) {
// Set the options to be the string after -ignore-env
size_t pos = cppstr.find("-ignore-env");
if (pos != std::string::npos) {
cppstr = cppstr.substr(pos+sizeof("-ignore-env"));
optionChangable = false;
}
}
if (optionChangable) {
if (AMD_OCL_BUILD_OPTIONS != NULL) {
// Override options.
cppstr = AMD_OCL_BUILD_OPTIONS;
}
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
cppstr.append(" ");
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
}
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
}
}
if (!option::parseAllOptions(cppstr, parsedOptions)) {
programLog_ = parsedOptions.optionsLog();
return CL_INVALID_BUILD_OPTIONS;
}
programLog_ = parsedOptions.optionsLog();
// Build the program programs associated with the given devices.
std::vector<Device*>::const_iterator it;
for (it = devices.begin(); it != devices.end(); ++it) {
device::Program* devProgram = getDeviceProgram(**it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
const int oclVer = GetOclCVersion(parsedOptions.oVariables->CLStd);
if (sourceCode_.empty() && (bin.first == NULL)) {
retval = false;
continue;
}
retval = addDeviceProgram(**it, bin.first, bin.second, oclVer);
if (retval != CL_SUCCESS) {
return retval;
}
devProgram = getDeviceProgram(**it);
}
parsedOptions.oVariables->AssumeAlias = (*it)->settings().assumeAliases_;
// We only build a Device-Program once
if (devProgram->buildStatus() != CL_BUILD_NONE) {
continue;
}
cl_int result = devProgram->build(sourceCode_, options, &parsedOptions);
// Check if the previous device failed a build
if ((result != CL_SUCCESS) && (retval != CL_SUCCESS)) {
retval = CL_INVALID_OPERATION;
}
// Update the returned value with a build error
else if (result != CL_SUCCESS) {
retval = result;
}
}
// Rebuild the symbol table
deviceprograms_t::iterator sit;
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
const Device& device = *sit->first;
const device::Program& program = *sit->second;
const device::Program::kernels_t& kernels = program.kernels();
device::Program::kernels_t::const_iterator kit;
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
const std::string& name = kit->first;
const device::Kernel* devKernel = kit->second;
Symbol& symbol = (*symbolTable_)[name];
if (!symbol.setDeviceKernel(device, devKernel)) {
retval = CL_BUILD_PROGRAM_FAILURE;
}
}
}
// Create a string with all kernel names from the program
if (kernelNames_.length() == 0) {
amd::Program::symbols_t::const_iterator it;
for (it = symbols().begin(); it != symbols().end(); ++it) {
if (it != symbols().begin()) {
kernelNames_.append(1, ';');
}
kernelNames_.append(it->first.c_str());
}
}
if (notifyFptr != NULL) {
notifyFptr(as_cl(this), data);
}
return retval;
}
bool
Program::buildNoOpt(const Device& device, const std::string& kernelName)
{
ScopedLock sl(buildLock_);
// Don't allow multiple builds of program without optimizations
if (!firstBuildNoOpt_) {
return false;
}
firstBuildNoOpt_ = false;
symbols_t::const_iterator it = symbolTable_->find(kernelName);
assert((it != symbolTable_->end()) && "Kernel must be valid at this time");
const Symbol& progSymbol = it->second;
// Check if program already has unoptimized kernel
device::Kernel* devKernel = const_cast<device::Kernel*>
(progSymbol.getDeviceKernel(device, false));
if (devKernel != NULL) {
return true;
}
// Find the original program for build options string
deviceprograms_t::const_iterator pit = devicePrograms_.find(&device);
assert((pit != devicePrograms_.end()) && "Program must be valid at this time");
device::Program* orgProgram = pit->second;
// Process build options.
option::Options parsedOptions;
std::string cppstr(orgProgram->compileOptions());
if (AMD_OCL_BUILD_OPTIONS != NULL) {
// Override options.
cppstr = AMD_OCL_BUILD_OPTIONS;
}
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
cppstr.append(" ");
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
}
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
}
if (!option::parseAllOptions(cppstr, parsedOptions)) {
return false;
}
parsedOptions.optionsLog();
parsedOptions.oVariables->AssumeAlias = true;
parsedOptions.oVariables->ForceLLVM = true;
// Find the program without optimizaiton
pit = devProgramsNoOpt_.find(&device);
// Update the symbol table
if (pit != devProgramsNoOpt_.end()) {
device::Program& program = *pit->second;
const device::Program::binary_t& progBinary = orgProgram->binary();
if (!program.setBinary(reinterpret_cast<char *>(const_cast<void*>
(progBinary.first)), progBinary.second)) {
return false;
}
// Force recompilation from the binary only
if (CL_SUCCESS != program.build("", orgProgram->compileOptions().c_str(),
&parsedOptions)) {
return false;
}
const device::Program::kernels_t& kernels = program.kernels();
device::Program::kernels_t::const_iterator kit;
for (kit = kernels.begin(); kit != kernels.end(); ++kit) {
const std::string& name = kit->first;
const device::Kernel* devKernel = kit->second;
symbols_t::iterator sit = symbolTable_->find(name);
Symbol& symbol = sit->second;
if (!symbol.setDeviceKernel(device, devKernel, false)) {
return false;
}
}
}
return true;
}
void
Program::clear()
{
deviceprograms_t::iterator sit;
// Destroy old programs if we have any
for (sit = devicePrograms_.begin(); sit != devicePrograms_.end(); ++sit) {
// Destroy device program
delete sit->second;
}
for (sit = devProgramsNoOpt_.begin(); sit != devProgramsNoOpt_.end(); ++sit) {
// Destroy device program
delete sit->second;
}
devicePrograms_.clear();
devProgramsNoOpt_.clear();
deviceList_.clear();
if (symbolTable_) symbolTable_->clear();
kernelNames_.clear();
}
bool
Symbol::setDeviceKernel(
const Device& device,
const device::Kernel* func,
bool noAlias)
{
// FIXME_lmoriche: check that the signatures are compatible
if (deviceKernels_.size() == 0 || device.type() == CL_DEVICE_TYPE_CPU) {
signature_ = func->signature();
}
if (noAlias) {
deviceKernels_[&device] = func;
}
else {
devKernelsNoOpt_[&device] = func;
}
return true;
}
const device::Kernel*
Symbol::getDeviceKernel(const Device& device, bool noAlias) const
{
const devicekernels_t* devKernels =
(noAlias) ? &deviceKernels_ : &devKernelsNoOpt_;
devicekernels_t::const_iterator itEnd = devKernels->end();
devicekernels_t::const_iterator it = devKernels->find(&device);
if (it != itEnd) {
return it->second;
}
for (it = devKernels->begin(); it != itEnd; ++it) {
if (it->first->isAncestor(&device)) {
return it->second;
}
}
return NULL;
}
} // namespace amd