From 239faab75e4eb00ef069d41df15be6b52ee90413 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 23 Sep 2014 12:44:50 -0400 Subject: [PATCH] P4 to Git Change 1079952 by yaxunl@yaxunl_stg_win50 on 2014/09/23 12:31:16 ECR #377625 - Workaround for Blender performance issue. Lower available VGPRs to improve waves per CU. Added BuildOptsAppend to OCL app profile. Read BuildOptsAppend and append to build options. Added specific wave optimization option for Blender. Affected files ... ... //depot/stg/opencl/drivers/opencl/appprofiles/oclappprofile.xml#7 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#116 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#170 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#230 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#63 edit [ROCm/clr commit: 16f8ca9aae0342269f21767b9e0a3022643e3ccd] --- .../clr/rocclr/compiler/lib/utils/OPTIONS.def | 8 +++ .../clr/rocclr/runtime/device/appprofile.cpp | 70 +++++++++++++++++++ .../clr/rocclr/runtime/device/appprofile.hpp | 22 +++++- projects/clr/rocclr/runtime/device/device.cpp | 3 + projects/clr/rocclr/runtime/device/device.hpp | 4 ++ .../runtime/device/gpu/gpuappprofile.cpp | 54 -------------- .../runtime/device/gpu/gpuappprofile.hpp | 19 ----- .../clr/rocclr/runtime/platform/program.cpp | 15 +++- 8 files changed, 119 insertions(+), 76 deletions(-) diff --git a/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def b/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def index 385727cec0..83d0fd0bde 100644 --- a/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def +++ b/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def @@ -809,6 +809,14 @@ OPTION(OT_UINT32, \ 100000, 0, 0xFFFFFFFF, NULL, \ "Set kernel size threshold for inliner (default 200000).") +// -wokth=int or --waves-opt-kernel-threshold (default 0) +OPTION(OT_UINT32, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "wokth", "waves-opt-kernel-threshold", \ + WavesOptKernelThreshold, \ + 0xFFFFFFFF, 0, 0xFFFFFFFF, NULL, \ + "Enable waves optimization when kernel size is greater than this threshold.") + // -fdef-res-id -fno-def-res-id OPTION(OT_BOOL, \ OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ diff --git a/projects/clr/rocclr/runtime/device/appprofile.cpp b/projects/clr/rocclr/runtime/device/appprofile.cpp index c7e2fc1fcd..53b94ffbbf 100644 --- a/projects/clr/rocclr/runtime/device/appprofile.cpp +++ b/projects/clr/rocclr/runtime/device/appprofile.cpp @@ -6,6 +6,7 @@ #include "os/os.hpp" #include "utils/flags.hpp" #include "appprofile.hpp" +#include static void* __stdcall adlMallocCallback(int n) { @@ -91,6 +92,8 @@ AppProfile::AppProfile(): hsaDeviceHint_(0), profileOverridesAllSettings_(false) { appFileName_ = amd::Os::getAppFileName(); + propertyDataMap_.insert(DataMap::value_type("BuildOptsAppend", + PropertyData(DataType_String, &buildOptsAppend_))); } AppProfile::~AppProfile() @@ -146,4 +149,71 @@ cl_device_type AppProfile::ApplyHsaDeviceHintFlag(const cl_device_type& type) return type; } +bool AppProfile::ParseApplicationProfile() +{ + amd::ADL* adl = new amd::ADL; + + if ((adl == NULL) || !adl->init()) { + delete adl; + return false; + } + + ADLApplicationProfile* pProfile = NULL; + + // Apply blb configurations + int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search( + adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL, + L"OCL", &pProfile); + + delete adl; + + if (pProfile == NULL) { + return false; + } + + PropertyRecord* firstProperty = pProfile->record; + uint32_t valueOffset = 0; + const int BUFSIZE = 1024; + wchar_t wbuffer[BUFSIZE]; + char buffer[2 * BUFSIZE]; + + for (int index = 0; index < pProfile->iCount; index++) { + PropertyRecord* profileProperty = reinterpret_cast + ((reinterpret_cast(firstProperty)) + valueOffset); + + // Get property name + char* propertyName = profileProperty->strName; + auto entry = propertyDataMap_.find(std::string(propertyName)); + if (entry == propertyDataMap_.end()) { + // unexpected name + valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); + continue; + } + + // Get the property value + switch (entry->second.type_) { + case DataType_Boolean: + *(reinterpret_cast(entry->second.data_)) = + profileProperty->uData[0] ? true : false; + break; + case DataType_String: { + assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 && + "app profile string too long"); + memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize); + wbuffer[profileProperty->iDataSize / 2] = L'\0'; + size_t len = wcstombs(buffer, wbuffer, sizeof(buffer)); + assert(len < sizeof(buffer) - 1 && "app profile string too long"); + *(reinterpret_cast(entry->second.data_)) = buffer; + break; + } + default: + break; + } + valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); + } + + free(pProfile); + return true; +} + } diff --git a/projects/clr/rocclr/runtime/device/appprofile.hpp b/projects/clr/rocclr/runtime/device/appprofile.hpp index a2e70aa682..c2a04367fc 100644 --- a/projects/clr/rocclr/runtime/device/appprofile.hpp +++ b/projects/clr/rocclr/runtime/device/appprofile.hpp @@ -6,6 +6,7 @@ #include "adl.h" +#include #include namespace amd { @@ -55,18 +56,35 @@ public: cl_device_type ApplyHsaDeviceHintFlag(const cl_device_type& type); bool IsHsaInitDisabled() { return noHsaInit_; } - + const std::string& GetBuildOptsAppend() const { return buildOptsAppend_; } protected: + enum DataTypes + { + DataType_Unknown = 0, + DataType_Boolean, + DataType_String, + }; + + struct PropertyData { + PropertyData(DataTypes type, void* data): type_(type), data_(data) {} + DataTypes type_; //!< Data type + void* data_; //!< Pointer to the data + }; + + typedef std::map DataMap; + + DataMap propertyDataMap_; std::string appFileName_; // without extension std::wstring wsAppFileName_; - virtual bool ParseApplicationProfile() { return true; } + virtual bool ParseApplicationProfile(); cl_device_type hsaDeviceHint_; // valid values: CL_HSA_ENABLED_AMD // or CL_HSA_DISABLED_AMD bool gpuvmHighAddr_; // Currently not used. bool noHsaInit_; // Do not even initialize HSA. bool profileOverridesAllSettings_; // Overrides hint flags and env.var. + std::string buildOptsAppend_; }; } diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp index fece9eba28..aa010a50de 100644 --- a/projects/clr/rocclr/runtime/device/device.cpp +++ b/projects/clr/rocclr/runtime/device/device.cpp @@ -51,6 +51,7 @@ namespace amd { std::vector *Device::devices_ = NULL; bool Device::isHsaDeviceAvailable_ = false; bool Device::isGpuDeviceAvailable_ = false; +AppProfile Device::appProfile_; #if defined(WITH_HSA_DEVICE) AppProfile* Device::oclhsaAppProfile_ = NULL; @@ -150,6 +151,8 @@ Device::init() assert(!Runtime::initialized() && "initialize only once"); bool ret = false; devices_ = NULL; + appProfile_.init(); + // IMPORTANT: Note that we are initialiing HSA stack first and then // GPU stack. The order of initialization is signiicant and if changed diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index ba4eee7643..56a3c3606d 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -1675,6 +1675,9 @@ public: //! RTTI internal implementation virtual ObjectType objectType() const {return ObjectTypeDevice;} + //! Returns app profile + static const AppProfile* appProfile() {return &appProfile_;} + protected: //! Enable the specified extension char* getExtensionString(); @@ -1683,6 +1686,7 @@ protected: device::Settings* settings_; //!< Device settings bool online_; //!< The device in online BlitProgram* blitProgram_; //!< Blit program info + static AppProfile appProfile_; //!< application profile private: bool IsHsaCapableDevice(); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp index 0fb9575bd8..50118d91f9 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp @@ -21,59 +21,5 @@ AppProfile::AppProfile() PropertyData(DataType_Boolean, &reportAsOCL12Device_))); } -bool AppProfile::ParseApplicationProfile() -{ - amd::ADL* adl = new amd::ADL; - - if ((adl == NULL) || !adl->init()) { - delete adl; - return false; - } - - ADLApplicationProfile* pProfile = NULL; - - // Apply blb configurations - int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search( - adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL, - L"OCL", &pProfile); - - delete adl; - - if (pProfile == NULL) { - return false; - } - - PropertyRecord* firstProperty = pProfile->record; - uint32_t valueOffset = 0; - - for (int index = 0; index < pProfile->iCount; index++) { - PropertyRecord* profileProperty = reinterpret_cast - ((reinterpret_cast(firstProperty)) + valueOffset); - - // Get property name - char* propertyName = profileProperty->strName; - auto entry = propertyDataMap_.find(std::string(propertyName)); - if (entry == propertyDataMap_.end()) { - // unexpected name - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - continue; - } - - // Get the property value - switch (entry->second.type_) { - case DataType_Boolean: - *(reinterpret_cast(entry->second.data_)) = - profileProperty->uData[0] ? true : false; - break; - default: - break; - } - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - } - - free(pProfile); - return true; -} - } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp index 1a3ec5d1d6..348d847642 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp @@ -19,26 +19,7 @@ public: bool enableHighPerformanceState() const { return enableHighPerformanceState_; } bool reportAsOCL12Device() const { return reportAsOCL12Device_; } -protected: - //! parse application profile based on application file name - virtual bool ParseApplicationProfile(); - private: - enum DataTypes - { - DataType_Unknown = 0, - DataType_Boolean, - }; - - struct PropertyData { - PropertyData(DataTypes type, void* data): type_(type), data_(data) {} - DataTypes type_; //!< Data type - void* data_; //!< Pointer to the data - }; - - typedef std::map DataMap; - - DataMap propertyDataMap_; bool enableHighPerformanceState_; bool reportAsOCL12Device_; diff --git a/projects/clr/rocclr/runtime/platform/program.cpp b/projects/clr/rocclr/runtime/platform/program.cpp index 52e951511c..fdd969bee6 100644 --- a/projects/clr/rocclr/runtime/platform/program.cpp +++ b/projects/clr/rocclr/runtime/platform/program.cpp @@ -1,7 +1,8 @@ // // Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. // - +#include "top.hpp" +#include "device/appprofile.hpp" #include "platform/program.hpp" #include "platform/context.hpp" #include "utils/options.hpp" @@ -174,6 +175,10 @@ Program::compile( // Override options. cppstr = AMD_OCL_BUILD_OPTIONS; } + if (!Device::appProfile()->GetBuildOptsAppend().empty()) { + cppstr.append(" "); + cppstr.append(Device::appProfile()->GetBuildOptsAppend()); + } if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) { cppstr.append(" "); cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND); @@ -426,6 +431,10 @@ Program::build( // Override options. cppstr = AMD_OCL_BUILD_OPTIONS; } + if (!Device::appProfile()->GetBuildOptsAppend().empty()) { + cppstr.append(" "); + cppstr.append(Device::appProfile()->GetBuildOptsAppend()); + } if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) { cppstr.append(" "); cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND); @@ -543,6 +552,10 @@ Program::buildNoOpt(const Device& device, const std::string& kernelName) // Override options. cppstr = AMD_OCL_BUILD_OPTIONS; } + if (!Device::appProfile()->GetBuildOptsAppend().empty()) { + cppstr.append(" "); + cppstr.append(Device::appProfile()->GetBuildOptsAppend()); + } if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) { cppstr.append(" "); cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);