P4 to Git Change 1079952 by yaxunl@yaxunl_stg_win50 on 2014/09/23 12:31:16
ECR #377625 - Workaround for Blender performance issue. Lower available VGPRs to improve waves per CU.
Added BuildOptsAppend to OCL app profile.
Read BuildOptsAppend and append to build options.
Added specific wave optimization option for Blender.
Affected files ...
... //depot/stg/opencl/drivers/opencl/appprofiles/oclappprofile.xml#7 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#170 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#230 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#63 edit
[ROCm/clr commit: 16f8ca9aae]
This commit is contained in:
@@ -809,6 +809,14 @@ OPTION(OT_UINT32, \
|
||||
100000, 0, 0xFFFFFFFF, NULL, \
|
||||
"Set kernel size threshold for inliner (default 200000).")
|
||||
|
||||
// -wokth=int or --waves-opt-kernel-threshold (default 0)
|
||||
OPTION(OT_UINT32, \
|
||||
OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \
|
||||
"wokth", "waves-opt-kernel-threshold", \
|
||||
WavesOptKernelThreshold, \
|
||||
0xFFFFFFFF, 0, 0xFFFFFFFF, NULL, \
|
||||
"Enable waves optimization when kernel size is greater than this threshold.")
|
||||
|
||||
// -fdef-res-id -fno-def-res-id
|
||||
OPTION(OT_BOOL, \
|
||||
OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "os/os.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "appprofile.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
static void* __stdcall adlMallocCallback(int n)
|
||||
{
|
||||
@@ -91,6 +92,8 @@ AppProfile::AppProfile(): hsaDeviceHint_(0),
|
||||
profileOverridesAllSettings_(false)
|
||||
{
|
||||
appFileName_ = amd::Os::getAppFileName();
|
||||
propertyDataMap_.insert(DataMap::value_type("BuildOptsAppend",
|
||||
PropertyData(DataType_String, &buildOptsAppend_)));
|
||||
}
|
||||
|
||||
AppProfile::~AppProfile()
|
||||
@@ -146,4 +149,71 @@ cl_device_type AppProfile::ApplyHsaDeviceHintFlag(const cl_device_type& type)
|
||||
return type;
|
||||
}
|
||||
|
||||
bool AppProfile::ParseApplicationProfile()
|
||||
{
|
||||
amd::ADL* adl = new amd::ADL;
|
||||
|
||||
if ((adl == NULL) || !adl->init()) {
|
||||
delete adl;
|
||||
return false;
|
||||
}
|
||||
|
||||
ADLApplicationProfile* pProfile = NULL;
|
||||
|
||||
// Apply blb configurations
|
||||
int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search(
|
||||
adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL,
|
||||
L"OCL", &pProfile);
|
||||
|
||||
delete adl;
|
||||
|
||||
if (pProfile == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PropertyRecord* firstProperty = pProfile->record;
|
||||
uint32_t valueOffset = 0;
|
||||
const int BUFSIZE = 1024;
|
||||
wchar_t wbuffer[BUFSIZE];
|
||||
char buffer[2 * BUFSIZE];
|
||||
|
||||
for (int index = 0; index < pProfile->iCount; index++) {
|
||||
PropertyRecord* profileProperty = reinterpret_cast<PropertyRecord*>
|
||||
((reinterpret_cast<char*>(firstProperty)) + valueOffset);
|
||||
|
||||
// Get property name
|
||||
char* propertyName = profileProperty->strName;
|
||||
auto entry = propertyDataMap_.find(std::string(propertyName));
|
||||
if (entry == propertyDataMap_.end()) {
|
||||
// unexpected name
|
||||
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the property value
|
||||
switch (entry->second.type_) {
|
||||
case DataType_Boolean:
|
||||
*(reinterpret_cast<bool*>(entry->second.data_)) =
|
||||
profileProperty->uData[0] ? true : false;
|
||||
break;
|
||||
case DataType_String: {
|
||||
assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 &&
|
||||
"app profile string too long");
|
||||
memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize);
|
||||
wbuffer[profileProperty->iDataSize / 2] = L'\0';
|
||||
size_t len = wcstombs(buffer, wbuffer, sizeof(buffer));
|
||||
assert(len < sizeof(buffer) - 1 && "app profile string too long");
|
||||
*(reinterpret_cast<std::string*>(entry->second.data_)) = buffer;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
|
||||
}
|
||||
|
||||
free(pProfile);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include "adl.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace amd {
|
||||
@@ -55,18 +56,35 @@ public:
|
||||
|
||||
cl_device_type ApplyHsaDeviceHintFlag(const cl_device_type& type);
|
||||
bool IsHsaInitDisabled() { return noHsaInit_; }
|
||||
|
||||
const std::string& GetBuildOptsAppend() const { return buildOptsAppend_; }
|
||||
protected:
|
||||
enum DataTypes
|
||||
{
|
||||
DataType_Unknown = 0,
|
||||
DataType_Boolean,
|
||||
DataType_String,
|
||||
};
|
||||
|
||||
struct PropertyData {
|
||||
PropertyData(DataTypes type, void* data): type_(type), data_(data) {}
|
||||
DataTypes type_; //!< Data type
|
||||
void* data_; //!< Pointer to the data
|
||||
};
|
||||
|
||||
typedef std::map<std::string, PropertyData> DataMap;
|
||||
|
||||
DataMap propertyDataMap_;
|
||||
std::string appFileName_; // without extension
|
||||
std::wstring wsAppFileName_;
|
||||
|
||||
virtual bool ParseApplicationProfile() { return true; }
|
||||
virtual bool ParseApplicationProfile();
|
||||
|
||||
cl_device_type hsaDeviceHint_; // valid values: CL_HSA_ENABLED_AMD
|
||||
// or CL_HSA_DISABLED_AMD
|
||||
bool gpuvmHighAddr_; // Currently not used.
|
||||
bool noHsaInit_; // Do not even initialize HSA.
|
||||
bool profileOverridesAllSettings_; // Overrides hint flags and env.var.
|
||||
std::string buildOptsAppend_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ namespace amd {
|
||||
std::vector<Device*> *Device::devices_ = NULL;
|
||||
bool Device::isHsaDeviceAvailable_ = false;
|
||||
bool Device::isGpuDeviceAvailable_ = false;
|
||||
AppProfile Device::appProfile_;
|
||||
|
||||
#if defined(WITH_HSA_DEVICE)
|
||||
AppProfile* Device::oclhsaAppProfile_ = NULL;
|
||||
@@ -150,6 +151,8 @@ Device::init()
|
||||
assert(!Runtime::initialized() && "initialize only once");
|
||||
bool ret = false;
|
||||
devices_ = NULL;
|
||||
appProfile_.init();
|
||||
|
||||
|
||||
// IMPORTANT: Note that we are initialiing HSA stack first and then
|
||||
// GPU stack. The order of initialization is signiicant and if changed
|
||||
|
||||
@@ -1675,6 +1675,9 @@ public:
|
||||
//! RTTI internal implementation
|
||||
virtual ObjectType objectType() const {return ObjectTypeDevice;}
|
||||
|
||||
//! Returns app profile
|
||||
static const AppProfile* appProfile() {return &appProfile_;}
|
||||
|
||||
protected:
|
||||
//! Enable the specified extension
|
||||
char* getExtensionString();
|
||||
@@ -1683,6 +1686,7 @@ protected:
|
||||
device::Settings* settings_; //!< Device settings
|
||||
bool online_; //!< The device in online
|
||||
BlitProgram* blitProgram_; //!< Blit program info
|
||||
static AppProfile appProfile_; //!< application profile
|
||||
|
||||
private:
|
||||
bool IsHsaCapableDevice();
|
||||
|
||||
@@ -21,59 +21,5 @@ AppProfile::AppProfile()
|
||||
PropertyData(DataType_Boolean, &reportAsOCL12Device_)));
|
||||
}
|
||||
|
||||
bool AppProfile::ParseApplicationProfile()
|
||||
{
|
||||
amd::ADL* adl = new amd::ADL;
|
||||
|
||||
if ((adl == NULL) || !adl->init()) {
|
||||
delete adl;
|
||||
return false;
|
||||
}
|
||||
|
||||
ADLApplicationProfile* pProfile = NULL;
|
||||
|
||||
// Apply blb configurations
|
||||
int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search(
|
||||
adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL,
|
||||
L"OCL", &pProfile);
|
||||
|
||||
delete adl;
|
||||
|
||||
if (pProfile == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PropertyRecord* firstProperty = pProfile->record;
|
||||
uint32_t valueOffset = 0;
|
||||
|
||||
for (int index = 0; index < pProfile->iCount; index++) {
|
||||
PropertyRecord* profileProperty = reinterpret_cast<PropertyRecord*>
|
||||
((reinterpret_cast<char*>(firstProperty)) + valueOffset);
|
||||
|
||||
// Get property name
|
||||
char* propertyName = profileProperty->strName;
|
||||
auto entry = propertyDataMap_.find(std::string(propertyName));
|
||||
if (entry == propertyDataMap_.end()) {
|
||||
// unexpected name
|
||||
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the property value
|
||||
switch (entry->second.type_) {
|
||||
case DataType_Boolean:
|
||||
*(reinterpret_cast<bool*>(entry->second.data_)) =
|
||||
profileProperty->uData[0] ? true : false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
|
||||
}
|
||||
|
||||
free(pProfile);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -19,26 +19,7 @@ public:
|
||||
bool enableHighPerformanceState() const { return enableHighPerformanceState_; }
|
||||
bool reportAsOCL12Device() const { return reportAsOCL12Device_; }
|
||||
|
||||
protected:
|
||||
//! parse application profile based on application file name
|
||||
virtual bool ParseApplicationProfile();
|
||||
|
||||
private:
|
||||
enum DataTypes
|
||||
{
|
||||
DataType_Unknown = 0,
|
||||
DataType_Boolean,
|
||||
};
|
||||
|
||||
struct PropertyData {
|
||||
PropertyData(DataTypes type, void* data): type_(type), data_(data) {}
|
||||
DataTypes type_; //!< Data type
|
||||
void* data_; //!< Pointer to the data
|
||||
};
|
||||
|
||||
typedef std::map<std::string, PropertyData> DataMap;
|
||||
|
||||
DataMap propertyDataMap_;
|
||||
|
||||
bool enableHighPerformanceState_;
|
||||
bool reportAsOCL12Device_;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#include "top.hpp"
|
||||
#include "device/appprofile.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/context.hpp"
|
||||
#include "utils/options.hpp"
|
||||
@@ -174,6 +175,10 @@ Program::compile(
|
||||
// Override options.
|
||||
cppstr = AMD_OCL_BUILD_OPTIONS;
|
||||
}
|
||||
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
|
||||
cppstr.append(" ");
|
||||
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
|
||||
}
|
||||
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
|
||||
cppstr.append(" ");
|
||||
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
|
||||
@@ -426,6 +431,10 @@ Program::build(
|
||||
// Override options.
|
||||
cppstr = AMD_OCL_BUILD_OPTIONS;
|
||||
}
|
||||
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
|
||||
cppstr.append(" ");
|
||||
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
|
||||
}
|
||||
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
|
||||
cppstr.append(" ");
|
||||
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
|
||||
@@ -543,6 +552,10 @@ Program::buildNoOpt(const Device& device, const std::string& kernelName)
|
||||
// Override options.
|
||||
cppstr = AMD_OCL_BUILD_OPTIONS;
|
||||
}
|
||||
if (!Device::appProfile()->GetBuildOptsAppend().empty()) {
|
||||
cppstr.append(" ");
|
||||
cppstr.append(Device::appProfile()->GetBuildOptsAppend());
|
||||
}
|
||||
if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) {
|
||||
cppstr.append(" ");
|
||||
cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);
|
||||
|
||||
Reference in New Issue
Block a user