diff --git a/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def b/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def index 385727cec0..83d0fd0bde 100644 --- a/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def +++ b/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def @@ -809,6 +809,14 @@ OPTION(OT_UINT32, \ 100000, 0, 0xFFFFFFFF, NULL, \ "Set kernel size threshold for inliner (default 200000).") +// -wokth=int or --waves-opt-kernel-threshold (default 0) +OPTION(OT_UINT32, \ + OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \ + "wokth", "waves-opt-kernel-threshold", \ + WavesOptKernelThreshold, \ + 0xFFFFFFFF, 0, 0xFFFFFFFF, NULL, \ + "Enable waves optimization when kernel size is greater than this threshold.") + // -fdef-res-id -fno-def-res-id OPTION(OT_BOOL, \ OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \ diff --git a/projects/clr/rocclr/runtime/device/appprofile.cpp b/projects/clr/rocclr/runtime/device/appprofile.cpp index c7e2fc1fcd..53b94ffbbf 100644 --- a/projects/clr/rocclr/runtime/device/appprofile.cpp +++ b/projects/clr/rocclr/runtime/device/appprofile.cpp @@ -6,6 +6,7 @@ #include "os/os.hpp" #include "utils/flags.hpp" #include "appprofile.hpp" +#include static void* __stdcall adlMallocCallback(int n) { @@ -91,6 +92,8 @@ AppProfile::AppProfile(): hsaDeviceHint_(0), profileOverridesAllSettings_(false) { appFileName_ = amd::Os::getAppFileName(); + propertyDataMap_.insert(DataMap::value_type("BuildOptsAppend", + PropertyData(DataType_String, &buildOptsAppend_))); } AppProfile::~AppProfile() @@ -146,4 +149,71 @@ cl_device_type AppProfile::ApplyHsaDeviceHintFlag(const cl_device_type& type) return type; } +bool AppProfile::ParseApplicationProfile() +{ + amd::ADL* adl = new amd::ADL; + + if ((adl == NULL) || !adl->init()) { + delete adl; + return false; + } + + ADLApplicationProfile* pProfile = NULL; + + // Apply blb configurations + int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search( + adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL, + L"OCL", &pProfile); + + delete adl; + + if (pProfile == NULL) { + return false; + } + + PropertyRecord* firstProperty = pProfile->record; + uint32_t valueOffset = 0; + const int BUFSIZE = 1024; + wchar_t wbuffer[BUFSIZE]; + char buffer[2 * BUFSIZE]; + + for (int index = 0; index < pProfile->iCount; index++) { + PropertyRecord* profileProperty = reinterpret_cast + ((reinterpret_cast(firstProperty)) + valueOffset); + + // Get property name + char* propertyName = profileProperty->strName; + auto entry = propertyDataMap_.find(std::string(propertyName)); + if (entry == propertyDataMap_.end()) { + // unexpected name + valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); + continue; + } + + // Get the property value + switch (entry->second.type_) { + case DataType_Boolean: + *(reinterpret_cast(entry->second.data_)) = + profileProperty->uData[0] ? true : false; + break; + case DataType_String: { + assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 && + "app profile string too long"); + memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize); + wbuffer[profileProperty->iDataSize / 2] = L'\0'; + size_t len = wcstombs(buffer, wbuffer, sizeof(buffer)); + assert(len < sizeof(buffer) - 1 && "app profile string too long"); + *(reinterpret_cast(entry->second.data_)) = buffer; + break; + } + default: + break; + } + valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); + } + + free(pProfile); + return true; +} + } diff --git a/projects/clr/rocclr/runtime/device/appprofile.hpp b/projects/clr/rocclr/runtime/device/appprofile.hpp index a2e70aa682..c2a04367fc 100644 --- a/projects/clr/rocclr/runtime/device/appprofile.hpp +++ b/projects/clr/rocclr/runtime/device/appprofile.hpp @@ -6,6 +6,7 @@ #include "adl.h" +#include #include namespace amd { @@ -55,18 +56,35 @@ public: cl_device_type ApplyHsaDeviceHintFlag(const cl_device_type& type); bool IsHsaInitDisabled() { return noHsaInit_; } - + const std::string& GetBuildOptsAppend() const { return buildOptsAppend_; } protected: + enum DataTypes + { + DataType_Unknown = 0, + DataType_Boolean, + DataType_String, + }; + + struct PropertyData { + PropertyData(DataTypes type, void* data): type_(type), data_(data) {} + DataTypes type_; //!< Data type + void* data_; //!< Pointer to the data + }; + + typedef std::map DataMap; + + DataMap propertyDataMap_; std::string appFileName_; // without extension std::wstring wsAppFileName_; - virtual bool ParseApplicationProfile() { return true; } + virtual bool ParseApplicationProfile(); cl_device_type hsaDeviceHint_; // valid values: CL_HSA_ENABLED_AMD // or CL_HSA_DISABLED_AMD bool gpuvmHighAddr_; // Currently not used. bool noHsaInit_; // Do not even initialize HSA. bool profileOverridesAllSettings_; // Overrides hint flags and env.var. + std::string buildOptsAppend_; }; } diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp index fece9eba28..aa010a50de 100644 --- a/projects/clr/rocclr/runtime/device/device.cpp +++ b/projects/clr/rocclr/runtime/device/device.cpp @@ -51,6 +51,7 @@ namespace amd { std::vector *Device::devices_ = NULL; bool Device::isHsaDeviceAvailable_ = false; bool Device::isGpuDeviceAvailable_ = false; +AppProfile Device::appProfile_; #if defined(WITH_HSA_DEVICE) AppProfile* Device::oclhsaAppProfile_ = NULL; @@ -150,6 +151,8 @@ Device::init() assert(!Runtime::initialized() && "initialize only once"); bool ret = false; devices_ = NULL; + appProfile_.init(); + // IMPORTANT: Note that we are initialiing HSA stack first and then // GPU stack. The order of initialization is signiicant and if changed diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index ba4eee7643..56a3c3606d 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -1675,6 +1675,9 @@ public: //! RTTI internal implementation virtual ObjectType objectType() const {return ObjectTypeDevice;} + //! Returns app profile + static const AppProfile* appProfile() {return &appProfile_;} + protected: //! Enable the specified extension char* getExtensionString(); @@ -1683,6 +1686,7 @@ protected: device::Settings* settings_; //!< Device settings bool online_; //!< The device in online BlitProgram* blitProgram_; //!< Blit program info + static AppProfile appProfile_; //!< application profile private: bool IsHsaCapableDevice(); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp index 0fb9575bd8..50118d91f9 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.cpp @@ -21,59 +21,5 @@ AppProfile::AppProfile() PropertyData(DataType_Boolean, &reportAsOCL12Device_))); } -bool AppProfile::ParseApplicationProfile() -{ - amd::ADL* adl = new amd::ADL; - - if ((adl == NULL) || !adl->init()) { - delete adl; - return false; - } - - ADLApplicationProfile* pProfile = NULL; - - // Apply blb configurations - int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search( - adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL, - L"OCL", &pProfile); - - delete adl; - - if (pProfile == NULL) { - return false; - } - - PropertyRecord* firstProperty = pProfile->record; - uint32_t valueOffset = 0; - - for (int index = 0; index < pProfile->iCount; index++) { - PropertyRecord* profileProperty = reinterpret_cast - ((reinterpret_cast(firstProperty)) + valueOffset); - - // Get property name - char* propertyName = profileProperty->strName; - auto entry = propertyDataMap_.find(std::string(propertyName)); - if (entry == propertyDataMap_.end()) { - // unexpected name - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - continue; - } - - // Get the property value - switch (entry->second.type_) { - case DataType_Boolean: - *(reinterpret_cast(entry->second.data_)) = - profileProperty->uData[0] ? true : false; - break; - default: - break; - } - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - } - - free(pProfile); - return true; -} - } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp index 1a3ec5d1d6..348d847642 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuappprofile.hpp @@ -19,26 +19,7 @@ public: bool enableHighPerformanceState() const { return enableHighPerformanceState_; } bool reportAsOCL12Device() const { return reportAsOCL12Device_; } -protected: - //! parse application profile based on application file name - virtual bool ParseApplicationProfile(); - private: - enum DataTypes - { - DataType_Unknown = 0, - DataType_Boolean, - }; - - struct PropertyData { - PropertyData(DataTypes type, void* data): type_(type), data_(data) {} - DataTypes type_; //!< Data type - void* data_; //!< Pointer to the data - }; - - typedef std::map DataMap; - - DataMap propertyDataMap_; bool enableHighPerformanceState_; bool reportAsOCL12Device_; diff --git a/projects/clr/rocclr/runtime/platform/program.cpp b/projects/clr/rocclr/runtime/platform/program.cpp index 52e951511c..fdd969bee6 100644 --- a/projects/clr/rocclr/runtime/platform/program.cpp +++ b/projects/clr/rocclr/runtime/platform/program.cpp @@ -1,7 +1,8 @@ // // Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. // - +#include "top.hpp" +#include "device/appprofile.hpp" #include "platform/program.hpp" #include "platform/context.hpp" #include "utils/options.hpp" @@ -174,6 +175,10 @@ Program::compile( // Override options. cppstr = AMD_OCL_BUILD_OPTIONS; } + if (!Device::appProfile()->GetBuildOptsAppend().empty()) { + cppstr.append(" "); + cppstr.append(Device::appProfile()->GetBuildOptsAppend()); + } if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) { cppstr.append(" "); cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND); @@ -426,6 +431,10 @@ Program::build( // Override options. cppstr = AMD_OCL_BUILD_OPTIONS; } + if (!Device::appProfile()->GetBuildOptsAppend().empty()) { + cppstr.append(" "); + cppstr.append(Device::appProfile()->GetBuildOptsAppend()); + } if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) { cppstr.append(" "); cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND); @@ -543,6 +552,10 @@ Program::buildNoOpt(const Device& device, const std::string& kernelName) // Override options. cppstr = AMD_OCL_BUILD_OPTIONS; } + if (!Device::appProfile()->GetBuildOptsAppend().empty()) { + cppstr.append(" "); + cppstr.append(Device::appProfile()->GetBuildOptsAppend()); + } if (AMD_OCL_BUILD_OPTIONS_APPEND != NULL) { cppstr.append(" "); cppstr.append(AMD_OCL_BUILD_OPTIONS_APPEND);