P4 to Git Change 1079952 by yaxunl@yaxunl_stg_win50 on 2014/09/23 12:31:16

ECR #377625 - Workaround for Blender performance issue. Lower available VGPRs to improve waves per CU.

	Added BuildOptsAppend to OCL app profile.
	Read BuildOptsAppend and append to build options.
	Added specific wave optimization option for Blender.

Affected files ...

... //depot/stg/opencl/drivers/opencl/appprofiles/oclappprofile.xml#7 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#170 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#230 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#63 edit


[ROCm/clr commit: 16f8ca9aae]
This commit is contained in:
foreman
2014-09-23 12:44:50 -04:00
parent e023435e44
commit 239faab75e
8 changed files with 119 additions and 76 deletions
@@ -809,6 +809,14 @@ OPTION(OT_UINT32, \
100000, 0, 0xFFFFFFFF, NULL, \
"Set kernel size threshold for inliner (default 200000).")
// -wokth=int or --waves-opt-kernel-threshold (default 0)
OPTION(OT_UINT32, \
OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \
"wokth", "waves-opt-kernel-threshold", \
WavesOptKernelThreshold, \
0xFFFFFFFF, 0, 0xFFFFFFFF, NULL, \
"Enable waves optimization when kernel size is greater than this threshold.")
// -fdef-res-id -fno-def-res-id
OPTION(OT_BOOL, \
OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \
@@ -6,6 +6,7 @@
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "appprofile.hpp"
#include <cstdlib>
static void* __stdcall adlMallocCallback(int n)
{
@@ -91,6 +92,8 @@ AppProfile::AppProfile(): hsaDeviceHint_(0),
profileOverridesAllSettings_(false)
{
appFileName_ = amd::Os::getAppFileName();
propertyDataMap_.insert(DataMap::value_type("BuildOptsAppend",
PropertyData(DataType_String, &buildOptsAppend_)));
}
AppProfile::~AppProfile()
@@ -146,4 +149,71 @@ cl_device_type AppProfile::ApplyHsaDeviceHintFlag(const cl_device_type& type)
return type;
}
bool AppProfile::ParseApplicationProfile()
{
amd::ADL* adl = new amd::ADL;
if ((adl == NULL) || !adl->init()) {
delete adl;
return false;
}
ADLApplicationProfile* pProfile = NULL;
// Apply blb configurations
int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search(
adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL,
L"OCL", &pProfile);
delete adl;
if (pProfile == NULL) {
return false;
}
PropertyRecord* firstProperty = pProfile->record;
uint32_t valueOffset = 0;
const int BUFSIZE = 1024;
wchar_t wbuffer[BUFSIZE];
char buffer[2 * BUFSIZE];
for (int index = 0; index < pProfile->iCount; index++) {
PropertyRecord* profileProperty = reinterpret_cast<PropertyRecord*>
((reinterpret_cast<char*>(firstProperty)) + valueOffset);
// Get property name
char* propertyName = profileProperty->strName;
auto entry = propertyDataMap_.find(std::string(propertyName));
if (entry == propertyDataMap_.end()) {
// unexpected name
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
continue;
}
// Get the property value
switch (entry->second.type_) {
case DataType_Boolean:
*(reinterpret_cast<bool*>(entry->second.data_)) =
profileProperty->uData[0] ? true : false;
break;
case DataType_String: {
assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 &&
"app profile string too long");
memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize);
wbuffer[profileProperty->iDataSize / 2] = L'\0';
size_t len = wcstombs(buffer, wbuffer, sizeof(buffer));
assert(len < sizeof(buffer) - 1 && "app profile string too long");
*(reinterpret_cast<std::string*>(entry->second.data_)) = buffer;
break;
}
default:
break;
}
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
}
free(pProfile);
return true;
}
}
@@ -6,6 +6,7 @@
#include "adl.h"
#include <map>
#include <string>
namespace amd {
@@ -55,18 +56,35 @@ public:
cl_device_type ApplyHsaDeviceHintFlag(const cl_device_type& type);
bool IsHsaInitDisabled() { return noHsaInit_; }
const std::string& GetBuildOptsAppend() const { return buildOptsAppend_; }
protected:
enum DataTypes
{
DataType_Unknown = 0,
DataType_Boolean,
DataType_String,
};
struct PropertyData {
PropertyData(DataTypes type, void* data): type_(type), data_(data) {}
DataTypes type_; //!< Data type
void* data_; //!< Pointer to the data
};
typedef std::map<std::string, PropertyData> DataMap;
DataMap propertyDataMap_;
std::string appFileName_; // without extension
std::wstring wsAppFileName_;
virtual bool ParseApplicationProfile() { return true; }
virtual bool ParseApplicationProfile();
cl_device_type hsaDeviceHint_; // valid values: CL_HSA_ENABLED_AMD
// or CL_HSA_DISABLED_AMD
bool gpuvmHighAddr_; // Currently not used.
bool noHsaInit_; // Do not even initialize HSA.
bool profileOverridesAllSettings_; // Overrides hint flags and env.var.
std::string buildOptsAppend_;
};
}
@@ -51,6 +51,7 @@ namespace amd {
std::vector<Device*> *Device::devices_ = NULL;
bool Device::isHsaDeviceAvailable_ = false;
bool Device::isGpuDeviceAvailable_ = false;
AppProfile Device::appProfile_;
#if defined(WITH_HSA_DEVICE)
AppProfile* Device::oclhsaAppProfile_ = NULL;
@@ -150,6 +151,8 @@ Device::init()
assert(!Runtime::initialized() && "initialize only once");
bool ret = false;
devices_ = NULL;
appProfile_.init();
// IMPORTANT: Note that we are initialiing HSA stack first and then
// GPU stack. The order of initialization is signiicant and if changed
@@ -1675,6 +1675,9 @@ public:
//! RTTI internal implementation
virtual ObjectType objectType() const {return ObjectTypeDevice;}
//! Returns app profile
static const AppProfile* appProfile() {return &appProfile_;}
protected:
//! Enable the specified extension
char* getExtensionString();
@@ -1683,6 +1686,7 @@ protected:
device::Settings* settings_; //!< Device settings
bool online_; //!< The device in online
BlitProgram* blitProgram_; //!< Blit program info
static AppProfile appProfile_; //!< application profile
private:
bool IsHsaCapableDevice();
@@ -21,59 +21,5 @@ AppProfile::AppProfile()
PropertyData(DataType_Boolean, &reportAsOCL12Device_)));
}
bool AppProfile::ParseApplicationProfile()
{
amd::ADL* adl = new amd::ADL;
if ((adl == NULL) || !adl->init()) {
delete adl;
return false;
}
ADLApplicationProfile* pProfile = NULL;
// Apply blb configurations
int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search(
adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL,
L"OCL", &pProfile);
delete adl;
if (pProfile == NULL) {
return false;
}
PropertyRecord* firstProperty = pProfile->record;
uint32_t valueOffset = 0;
for (int index = 0; index < pProfile->iCount; index++) {
PropertyRecord* profileProperty = reinterpret_cast<PropertyRecord*>
((reinterpret_cast<char*>(firstProperty)) + valueOffset);
// Get property name
char* propertyName = profileProperty->strName;
auto entry = propertyDataMap_.find(std::string(propertyName));
if (entry == propertyDataMap_.end()) {
// unexpected name
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
continue;
}
// Get the property value
switch (entry->second.type_) {
case DataType_Boolean:
*(reinterpret_cast<bool*>(entry->second.data_)) =
profileProperty->uData[0] ? true : false;
break;
default:
break;
}
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
}
free(pProfile);
return true;
}
}
@@ -19,26 +19,7 @@ public:
bool enableHighPerformanceState() const { return enableHighPerformanceState_; }
bool reportAsOCL12Device() const { return reportAsOCL12Device_; }
protected:
//! parse application profile based on application file name
virtual bool ParseApplicationProfile();
private:
enum DataTypes
{
DataType_Unknown = 0,
DataType_Boolean,
};
struct PropertyData {
PropertyData(DataTypes type, void* data): type_(type), data_(data) {}
DataTypes type_; //!< Data type
void* data_; //!< Pointer to the data
};
typedef std::map<std::string, PropertyData> DataMap;
DataMap propertyDataMap_;
bool enableHighPerformanceState_;
bool reportAsOCL12Device_;
@@ -1,7 +1,8 @@
//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "top.hpp"
#include "device/appprofile.hpp"
#include "platform/program.hpp"
#include "platform/context.hpp"
#include "utils/options.hpp"
@@ -174,6 +175,10 @@ Program::compile(
// Override options.
cppstr = AMD_OCL_BUILD_OPTIONS;
}
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
cppstr.append(" ");
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
}
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
@@ -426,6 +431,10 @@ Program::build(
// Override options.
cppstr = AMD_OCL_BUILD_OPTIONS;
}
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
cppstr.append(" ");
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
}
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
@@ -543,6 +552,10 @@ Program::buildNoOpt(const Device& device, const std::string& kernelName)
// Override options.
cppstr = AMD_OCL_BUILD_OPTIONS;
}
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
cppstr.append(" ");
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
}
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
cppstr.append(" ");
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);