From 520cfc439dbf3cc47fe5446d5334e1d3a13bb5de Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 25 Nov 2015 15:23:51 -0500 Subject: [PATCH] P4 to Git Change 1214781 by smekhano@stas-rampitec-hsa on 2015/11/25 14:45:09 SWDEV-82596 - HSA HLC: Create AMDInline pass The generic llvm inlining heuristcs do not work well for GPU. In particular we have a common problem in several tests: If we have a pointer to private array passed into a function it will not be optimized out, leaving scratch usage. The pass increases the inline threshold to allow inliniting in this case. Also that we can move at least some AMD inlining customizations into this file from the common code. Inline hint threshold is moved in this change. Performance impact on ocltst sha256, 32 bit, Fiji: AMDIL HSAIL Diff HSAIL+Inliner Diff Diff before to AMDIL to HSAIL to AMDIL OCLPerfSHA256[ 0] 43.843 40.894 0.93 69.910 1.71 1.59 OCLPerfSHA256[ 1] 53.611 51.083 0.95 80.919 1.58 1.51 OCLPerfSHA256[ 2] 52.127 51.528 0.99 80.640 1.56 1.55 OCLPerfSHA256[ 3] 60.952 57.027 0.94 68.615 1.20 1.13 OCLPerfSHA256[ 4] 76.173 70.150 0.92 80.582 1.15 1.06 OCLPerfSHA256[ 5] 75.886 70.264 0.93 81.000 1.15 1.07 Testing: smoke, precheckin, ocltst sha256 Reviewed by Danill Fukalov Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/InitializePasses.h#93 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/LinkAllPasses.h#49 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/Transforms/IPO.h#32 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Transforms/IPO/AMDInline.cpp#1 add ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Transforms/IPO/CMakeLists.txt#24 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Transforms/IPO/IPO.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Transforms/IPO/Inliner.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/tools/opt/amdopt.inc#28 edit [ROCm/clr commit: 5e3d4f5a01327e9f02954c9c12154fd8512024b6] --- .../clr/rocclr/compiler/lib/backends/common/opt_level.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/clr/rocclr/compiler/lib/backends/common/opt_level.cpp b/projects/clr/rocclr/compiler/lib/backends/common/opt_level.cpp index c755e00c6f..fc7af4875a 100644 --- a/projects/clr/rocclr/compiler/lib/backends/common/opt_level.cpp +++ b/projects/clr/rocclr/compiler/lib/backends/common/opt_level.cpp @@ -58,14 +58,14 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel) if (HLC_Force_Always_Inliner_Pass) { Builder.Inliner = createAlwaysInlinerPass(); } else { - Builder.Inliner = createFunctionInliningPass(HLC_HSAIL_Inline_Threshold); + Builder.Inliner = createAMDFunctionInliningPass(HLC_HSAIL_Inline_Threshold); } } else #endif { HLC_Disable_Amd_Inline_All = false; // Always create Inliner regardless of OptLevel - Builder.Inliner = createFunctionInliningPass(500); + Builder.Inliner = createAMDFunctionInliningPass(500); } } else if (OptLevel > 1) { unsigned Threshold = 225; @@ -76,12 +76,12 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel) // Don't do inlining (including createAlwaysInlinerPass()) if OptimizationLevel // is zero becaue we are generating code for -g if (OptLevel > 0) { - Builder.Inliner = createFunctionInliningPass(Threshold); + Builder.Inliner = createAMDFunctionInliningPass(Threshold); } } else #endif { - Builder.Inliner = createFunctionInliningPass(Threshold); + Builder.Inliner = createAMDFunctionInliningPass(Threshold); } } Builder.SizeLevel = 0;