From f0cb06f99b56cc755c006bb33aef0e9fd394d986 Mon Sep 17 00:00:00 2001 From: "Andrey Kasaurov (xN/A) SP" Date: Thu, 2 Jul 2015 10:27:31 -0500 Subject: [PATCH] Fix HSA Finalizer including Cherry pick of CL#1166690 from SC Stg and update for RT samples. It contains: Change 1166280 on 2015/06/30 by bolek@bolek-common2 EPR #092474 - Fix missing integrations Change 1164156 on 2015-06-23 by nhaustov ECR #010005 - HSA Finalizer: Add missing tests. Change 1164232 on 2015-06-23 by nhaustov ECR #333756 - HSA Finalizer: Implement reading of notes. Change 1166268 on 2015/06/30 by bolek@bolek-laser EPR #010001 - Promotion of the Shader Compiler (SC) Release SC Library version 0001.IL01-02.0339 Date: June 30, 2015 Changelist (stg/sc): 1165197 Change 1163976 on 2015-06-22 by sashao EPR #373149 - OpenGL ES 3.0 Development Change 1164122 on 2015-06-23 by nhaustov ECR #033756 - HSA Finalizer: Fix compilation warnings. Change 1164150 on 2015-06-23 by efinger EPR #092474 - bugzilla 10829 - optimize out useless V_PERM_B32 feeding packed math op, in early expansion, rather than late expansion, and do it regardless of whether the packed op will be split or not. Change 1164187 on 2015-06-23 by efinger EPR #092474 - Add and use GetUAVInfo() and GetNumUAVs() Change 1164194 on 2015-06-23 by rgottlie EPR #092474 - Fix Linux Build Issue for SC_OPEN_SOURCE Change 1164204 on 2015-06-23 by mbedy EPR #092474 - Update Open Source build - now working from SC stg. Change 1164216 on 2015-06-23 by rouellet EPR #092474 - Add directive to do what -il_interpreter does. Change 1164232 on 2015-06-23 by nhaustov ECR #333756 - HSA Finalizer: Implement reading of notes. Change 1164239 on 2015-06-23 by nhaustov ECR #333756 - HSA Finalizer: Fix OpenCL build problem. Change 1164275 on 2015-06-23 by nhaustov ECR #333756 - HSA Finalizer: Fix Linux build errors. Change 1164365 on 2015-06-23 by efinger EPR #092474 - Cleanup UAV Atomic handling Change 1164393 on 2015-06-23 by kzhuravl EPR #333756 - Finalizer/Loader fixes Change 1164654 on 2015-06-24 by dpreobra ECR #333753 - HSA HLC: SPB_ASM: TestGen improvements Change 1164727 on 2015-06-24 by bolek EPR #092474 - Enable function level linking, COMDAT folding and unused function removal optimizations in Dev release builds. This saves about 2.6MB in code size on 64-bit Dev. Change 1164760 on 2015-06-24 by rgottlie EPR #422210 - Fix problem with TransformScratch heuristics Change 1164761 on 2015-06-24 by rgottlie EPR #422181 - Fix handling of sub-dword load instructions in propagation of immediates from store to load in RefineMemory Change 1164764 on 2015-06-24 by bolek EPR #092474 - Add missing const Change 1164769 on 2015-06-24 by efinger EPR #092474 - Cleanup GDS atomics Change 1164776 on 2015-06-24 by efinger EPR #092474 - Fix linux build Change 1164799 on 2015-06-24 by mbedy EPR #092474 - Improve alignment for 2 DWORD instructions by more closely Change 1164803 on 2015-06-24 by efinger EPR #092474 - Open Source Cleanup Change 1164809 on 2015-06-24 by bfavela EPR #092474 - Escape an infinite loop in shader during the build of a DAG when a block is visited twice Change 1164814 on 2015-06-24 by bolek EPR #092474 - Add Dev command line option to disable individual peephole patterns (blame Chris for this one). Change 1164827 on 2015-06-24 by bfavela EPR #092474 - Adding small change to CL 1164809 as suggested by creeve to remove superfluous if() Change 1164842 on 2015-06-24 by gujin EPR #092474 - Prevent moving exit-loop checking to the end of loop if there is a branch in the loop that is optimized with a target replacement bypassing the loop end. This is to fix an OpenGL hull shader conformance test fail (bug 10859). Change 1164876 on 2015-06-24 by rgottlie EPR #092474 - Only allow memory merging if no memory scope or order is specified Change 1164883 on 2015-06-24 by kdintino EPR #092474 - Add HSAIL files to the AMD -> LLVM copyright replacement loop. Change 1165060 on 2015-06-25 by efinger EPR #092474 - Open Source Cleanup - Copyright Change 1165077 on 2015-06-25 by efinger EPR #092474 - Cleanup LDS atomics - part 1 (groundwork) Change 1165080 on 2015-06-25 by bfavela EPR #092474 - Extension to SUPPRESS_PI_REDUCE_F32 for TAN (TAN_F16 is already handled by expansion) Change 1165189 on 2015-06-25 by efinger EPR #092474 - Cleanup LDS atomics - part 2 Change 1165196 on 2015-06-25 by bolek EPR #092474 - Add syntax to the peephole pattern language to specify SCInst flag values or wildcards. Change 1165197 on 2015-06-25 by bolek EPR #092474 - Allow the MulAddToMadF peephole pattern to modify instructcions marked as invariant (result should still be the same) Change 1165438 on 2015/06/26 by bolek@bolek-common2 EPR #010001 - Promotion of the Shader Compiler (SC) Release SC Library version 0001.IL01-02.0338 Date: June 26, 2015 Changelist (stg/sc): 1163954 Change 1161629 on 2015-06-15 by efinger EPR #092474 - Move CFG:IL2IRProcessDeclare() to global scope Change 1161633 on 2015-06-15 by rouellet EPR #092474 - Bugzilla 10852 call ConverInstFields when translating COND_MOVE. Change 1161643 on 2015-06-15 by rgottlie EPR #092474 - Handle manually inserted wait state for SALU writing M0 followd by VINTERP Change 1161718 on 2015-06-15 by lifpan EPR #092474 - The "point size" in copy shader of GS Change 1161721 on 2015-06-15 by xlji EPR #092474 - Split DIV_F16 and DIV_PRECISE_F16 Change 1161850 on 2015-06-16 by kzhuravl EPR #333756 - Change a few function names, general cleanup (no functional change) Change 1161934 on 2015-06-16 by efinger EPR #092474 - Fix linux compile warnings Change 1161946 on 2015-06-16 by nhaustov ECR #333756 - HSA Finalizer: Fix Linux build warnings. Change 1161981 on 2015-06-16 by efinger EPR #092474 - Open Source Cleanup Change 1161991 on 2015-06-16 by efinger EPR #092474 - Move CFG::IL2IRProcessSpecial() to global scope Change 1161997 on 2015-06-16 by rgottlie EPR #092474 - Fix compile warnings under Linux Change 1162001 on 2015-06-16 by efinger EPR #092474 - Fix linux build Change 1162045 on 2015-06-16 by mherdeg EPR #092474 - Comment out unused functions to fix linux compiler warnings. Change 1162048 on 2015-06-16 by akasauro EPR #092474 - SC: Some AMD OCL SDK tests (including BinomialOption) assert in SCInst.cpp. [on behalf of Atrem Tamazov] Change 1162061 on 2015-06-16 by efinger EPR #092474 - Rename NewIRInst to MakeIRInst and drop last (unused) arg. Change 1162066 on 2015-06-16 by creeve EPR #092474 - Linux build fixes for open source. Change 1162067 on 2015-06-16 by creeve EPR #092474 - Improve hash table grow and sanitize. Change 1162072 on 2015-06-16 by creeve EPR #092474 - Peephole |x| * |x| => x*x Change 1162089 on 2015-06-16 by chfang EPR #092474 - Fix linux compiler warnings in SCStructureAnalyzer.cpp. Change 1162145 on 2015-06-16 by efinger EPR #092474 - Improve interface to MakeInstOp[123] Change 1162427 on 2015-06-17 by efinger EPR #092474 - bugzilla 10862 - Back out changelist 1161549 Change 1162434 on 2015-06-17 by rgottlie EPR #092474 - Only dump individual functions in each pass of Refine Memory Change 1162436 on 2015-06-17 by kzhuravl EPR #333756 - Integrate runtime independent loader from stg hsa + update project files Change 1162442 on 2015-06-17 by efinger EPR #092474 - Add and use CreateRegTemp() Change 1162505 on 2015-06-17 by skolton ECR #333756 - HSA Finalizer: Doorbell signals support Change 1162527 on 2015-06-17 by kzhuravl EPR #333756 - Always set dx10_clamp to true for hsa Change 1162531 on 2015-06-17 by efinger EPR #092474 - Fix linux compile warnings Change 1162568 on 2015-06-17 by mbedy EPR #092474 - Specify a newer DX9 SDK for SCDevUtil that correctly links with WDK n10136. Change 1162623 on 2015-06-17 by mherdeg EPR #092474 - Remove duplicate #include "SCHSAInterface.h". It confuses Intellisense in Visual Studio. Change 1162905 on 2015-06-18 by rgottlie EPR #092474 - Fix Linux Build Warnings Change 1162930 on 2015-06-18 by nhaustov ECR #333756 - HSA Finalizer: Cleanup amdhsafin command-line tool. Change 1162938 on 2015-06-18 by nhaustov ECR #333756 - HSA Finalizer: Fix build problem. Change 1162944 on 2015-06-18 by rgottlie EPR #092474 - Clean up bug descriptions as per Phil's suggestion Change 1162951 on 2015-06-18 by skolton ECR #333756 - HSA Finalizer: Bug fix for 1DB query image Change 1163009 on 2015-06-18 by nhaustov ECR #333756 - HSA Finalizer: build amdhsafin with WITH_LIBBRIGDWARF when needed. Change 1163263 on 2015-06-19 by nhaustov ECR #092474 - Fix patgen VS build by quoting %TMPDIR%. Change 1163265 on 2015-06-19 by skolton ECR #333756 - HSA Finalizer: Fix for doorbell signal store. Change 1163310 on 2015-06-19 by nhaustov ECR #333756 - HSA Finalizer: Introduce separate amdhsacode library. Change 1163316 on 2015-06-19 by nhaustov ECR #333756 - HSA Finalizer: Fix OpenCL build problem. Change 1163320 on 2015-06-19 by nhaustov ECR #333756 - HSA Finalizer: Fix another OpenCL build problem. Change 1163331 on 2015-06-19 by mjared EPR #092474 - Replace asin/acos 5th order minimax polynomial with a 6th order double locked (at 0 and 1) minimax polynomial Change 1163353 on 2015-06-19 by efinger EPR #092474 - Use normal temps (not expansion temps) for expansion template T regs. Change 1163473 on 2015-06-19 by mjared EPR #092474 - Improve accuracy of ATAN instruction by replacing rational approximation with a 17th order double locked minimax polynomial. Also increase degree of ASIN/ACOS double locked minimax polynomial to 7. Change 1163475 on 2015-06-19 by creeve EPR #092474 - Avoid putting partial write on export instruction. This feature existed before but only occurred if the output was point sprite. This change removed that restriction. Also fixed the implementation of //EsMode and //LsMode shader directi Change 1163481 on 2015-06-19 by mjared EPR #092747 - Misc. python scripts for working with transcendental functions. Includes fast implementation of remez minimax algorithm for absolute error and slower optimization-based remez for weighted/custom error reduction. Change 1163528 on 2015-06-19 by creeve EPR #092474 - Fix build issue. Change 1163603 on 2015-06-21 by bolek EPR #092474 - patgen makefile cleanup Change 1163614 on 2015-06-21 by kzhuravl EPR #333756 - Integrate runtime independent loader changes from stg hsa Change 1163699 on 2015-06-22 by rouellet EPR #092474 - bugzilla 10854 Get cb0[1] initialized with group dimensions for compute shaders on r800 and newer. Make IL and HW interpreter details and variable names more closely match. Flush denorms when doing cube mapped samples (the cb0[1] init Change 1163713 on 2015-06-22 by efinger EPR #092474 - Convert all usage of expansion temps to regular temps Change 1163718 on 2015-06-22 by nhaustov ECR #333756 - HSA Finalizer: Add loader (-loader option) to amdhsafin and update tests. Change 1163732 on 2015-06-22 by nhaustov ECR #333756 - HSA Finalizer: Implement images in amdhsafin loader and update tests. Change 1163774 on 2015-06-22 by mbedy EPR #092474 - Strip _DEV macros from open source. Fix issue in ifdef stripping. Change 1163786 on 2015-06-22 by mbedy EPR #092474 - Revert unintentionally submitted change. Change 1163803 on 2015-06-22 by bolek EPR #092474 - Peephole compile-time performance improvements Change 1163832 on 2015-06-22 by efinger EPR #092474 - bugzilla 10849 - fix copy propagation bug with SDWA Change 1163916 on 2015-06-22 by efinger EPR #092474 - Nuke support for expansion temps Change 1163954 on 2015-06-22 by creeve EPR #092474 - More code sanitization. Change 1164740 on 2015/06/24 by vpykhtin@vpykhtin-SC ECR #333753 - Cherrypicking CL1164641 from stg/sc (that is cherrypick of CL1164640 form stg/opencl) Testing: TC PSDB [git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1167011] [ROCm/ROCR-Runtime commit: 3bbf3c6a8b29985687e03701d81997aa646979df] --- projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp | 2 ++ projects/rocr-runtime/samples/common/utilities.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp index 88fc06c87d..0f9a41c8cb 100755 --- a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp +++ b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp @@ -42,6 +42,7 @@ typedef uint8_t BrigLinkage8_t; typedef uint8_t BrigExecutableModifier8_t; typedef BrigDataOffset32_t BrigDataOffsetString32_t; +/* enum BrigKinds { BRIG_KIND_NONE = 0x0000, BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, @@ -80,6 +81,7 @@ struct BrigData { uint32_t byteCount; uint8_t bytes[1]; }; +*/ // Provide access to command line arguments passed in by user extern uint32_t hsa_cmdline_arg_cnt; diff --git a/projects/rocr-runtime/samples/common/utilities.h b/projects/rocr-runtime/samples/common/utilities.h index 4238efd61e..49e2b2500d 100644 --- a/projects/rocr-runtime/samples/common/utilities.h +++ b/projects/rocr-runtime/samples/common/utilities.h @@ -66,6 +66,7 @@ typedef uint8_t BrigExecutableModifier8_t; typedef BrigDataOffset32_t BrigDataOffsetString32_t; +/* enum BrigKinds { BRIG_KIND_NONE = 0x0000, BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, @@ -104,6 +105,7 @@ struct BrigData { uint32_t byteCount; uint8_t bytes[1]; }; +*/ struct float2 {