From 239faab75e4eb00ef069d41df15be6b52ee90413 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 23 Sep 2014 12:44:50 -0400
Subject: [PATCH] P4 to Git Change 1079952 by yaxunl@yaxunl_stg_win50 on
2014/09/23 12:31:16
ECR #377625 - Workaround for Blender performance issue. Lower available VGPRs to improve waves per CU.
Added BuildOptsAppend to OCL app profile.
Read BuildOptsAppend and append to build options.
Added specific wave optimization option for Blender.
Affected files ...
... //depot/stg/opencl/drivers/opencl/appprofiles/oclappprofile.xml#7 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#170 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#230 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#63 edit
[ROCm/clr commit: 16f8ca9aae0342269f21767b9e0a3022643e3ccd]
---
.../clr/rocclr/compiler/lib/utils/OPTIONS.def | 8 +++
.../clr/rocclr/runtime/device/appprofile.cpp | 70 +++++++++++++++++++
.../clr/rocclr/runtime/device/appprofile.hpp | 22 +++++-
projects/clr/rocclr/runtime/device/device.cpp | 3 +
projects/clr/rocclr/runtime/device/device.hpp | 4 ++
.../runtime/device/gpu/gpuappprofile.cpp | 54 --------------
.../runtime/device/gpu/gpuappprofile.hpp | 19 -----
.../clr/rocclr/runtime/platform/program.cpp | 15 +++-
8 files changed, 119 insertions(+), 76 deletions(-)
diff --git a/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def b/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def
index 385727cec0..83d0fd0bde 100644
--- a/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def
+++ b/projects/clr/rocclr/compiler/lib/utils/OPTIONS.def
@@ -809,6 +809,14 @@ OPTION(OT_UINT32, \
100000, 0, 0xFFFFFFFF, NULL, \
"Set kernel size threshold for inliner (default 200000).")
+// -wokth=int or --waves-opt-kernel-threshold (default 0)
+OPTION(OT_UINT32, \
+ OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_REQUIRED|OA_SEPARATOR_EQUAL, \
+ "wokth", "waves-opt-kernel-threshold", \
+ WavesOptKernelThreshold, \
+ 0xFFFFFFFF, 0, 0xFFFFFFFF, NULL, \
+ "Enable waves optimization when kernel size is greater than this threshold.")
+
// -fdef-res-id -fno-def-res-id
OPTION(OT_BOOL, \
OA_LINK_EXE|OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED|OFA_PREFIX_F, \
diff --git a/projects/clr/rocclr/runtime/device/appprofile.cpp b/projects/clr/rocclr/runtime/device/appprofile.cpp
index c7e2fc1fcd..53b94ffbbf 100644
--- a/projects/clr/rocclr/runtime/device/appprofile.cpp
+++ b/projects/clr/rocclr/runtime/device/appprofile.cpp
@@ -6,6 +6,7 @@
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "appprofile.hpp"
+#include
static void* __stdcall adlMallocCallback(int n)
{
@@ -91,6 +92,8 @@ AppProfile::AppProfile(): hsaDeviceHint_(0),
profileOverridesAllSettings_(false)
{
appFileName_ = amd::Os::getAppFileName();
+ propertyDataMap_.insert(DataMap::value_type("BuildOptsAppend",
+ PropertyData(DataType_String, &buildOptsAppend_)));
}
AppProfile::~AppProfile()
@@ -146,4 +149,71 @@ cl_device_type AppProfile::ApplyHsaDeviceHintFlag(const cl_device_type& type)
return type;
}
+bool AppProfile::ParseApplicationProfile()
+{
+ amd::ADL* adl = new amd::ADL;
+
+ if ((adl == NULL) || !adl->init()) {
+ delete adl;
+ return false;
+ }
+
+ ADLApplicationProfile* pProfile = NULL;
+
+ // Apply blb configurations
+ int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search(
+ adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL,
+ L"OCL", &pProfile);
+
+ delete adl;
+
+ if (pProfile == NULL) {
+ return false;
+ }
+
+ PropertyRecord* firstProperty = pProfile->record;
+ uint32_t valueOffset = 0;
+ const int BUFSIZE = 1024;
+ wchar_t wbuffer[BUFSIZE];
+ char buffer[2 * BUFSIZE];
+
+ for (int index = 0; index < pProfile->iCount; index++) {
+ PropertyRecord* profileProperty = reinterpret_cast
+ ((reinterpret_cast(firstProperty)) + valueOffset);
+
+ // Get property name
+ char* propertyName = profileProperty->strName;
+ auto entry = propertyDataMap_.find(std::string(propertyName));
+ if (entry == propertyDataMap_.end()) {
+ // unexpected name
+ valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
+ continue;
+ }
+
+ // Get the property value
+ switch (entry->second.type_) {
+ case DataType_Boolean:
+ *(reinterpret_cast(entry->second.data_)) =
+ profileProperty->uData[0] ? true : false;
+ break;
+ case DataType_String: {
+ assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 &&
+ "app profile string too long");
+ memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize);
+ wbuffer[profileProperty->iDataSize / 2] = L'\0';
+ size_t len = wcstombs(buffer, wbuffer, sizeof(buffer));
+ assert(len < sizeof(buffer) - 1 && "app profile string too long");
+ *(reinterpret_cast(entry->second.data_)) = buffer;
+ break;
+ }
+ default:
+ break;
+ }
+ valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4);
+ }
+
+ free(pProfile);
+ return true;
+}
+
}
diff --git a/projects/clr/rocclr/runtime/device/appprofile.hpp b/projects/clr/rocclr/runtime/device/appprofile.hpp
index a2e70aa682..c2a04367fc 100644
--- a/projects/clr/rocclr/runtime/device/appprofile.hpp
+++ b/projects/clr/rocclr/runtime/device/appprofile.hpp
@@ -6,6 +6,7 @@
#include "adl.h"
+#include