Files
rocm-systems/rocclr/runtime/device/gpu/gpuprogram.cpp
T
foreman 4f098e2a50 P4 to Git Change 1061423 by nhaustov@nhaustov_hsa on 2014/07/31 04:34:11
ECR #010005 - Update to HSAIL PRM 1.0p. [ opencl changes ]

	Changes by Oleg Semenov, Dmitry Preobrazhensky, Valery Pykhtin.

	Testing: full CQE on multiple ASICs, smoke, finalizer_features, regression, OCL conformance 1.2 (offline).

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILAsm/HSAILAsm.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILAsm/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILAsm/build/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILAsm/build/Makefile.hsailasm#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGen.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGen.vcxproj#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenBackend.cpp#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenBackend.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenBackendEml.h#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenBackendLua.h#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenBrigContext.cpp#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenBrigContext.h#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenContext.cpp#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenContext.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenDataProvider.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenDataProvider.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenDump.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenEmulator.cpp#14 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenEmulator.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenFilter.h#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenManager.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenNavigator.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenNavigator.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenProp.cpp#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenProp.h#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenProvider.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenSample.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenSample.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenTestData.h#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenTestDesc.cpp#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenTestDesc.h#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenUtilities.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenUtilities.h#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenVal.cpp#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/HSAILTestGenVal.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/build/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/HSAILTestGen/build/Makefile.hsailtestgen#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/Makefile#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/asmdefs#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/asmlibs#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/asmrules#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/htdefs#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/htlibs#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/htrules#1 branch
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/BrigDwarfGenerator.cpp#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/BrigDwarfGenerator.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/Makefile#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/SectionHeaderTable.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/build/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/build/Makefile.libbrigdwarf#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/hsa_dwarf.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libBRIGdwarf.sln#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libBRIGdwarf.vcxproj#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libbrigdwarfdefs#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/build/Makefile#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/build/Makefile.libdwarf#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/libdwarf.vcxproj#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/libdwarfdefs#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/CHANGES#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/CODINGSTYLE#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/COPYING#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/ChangeLog#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/ChangeLog2006#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/ChangeLog2007#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/ChangeLog2008#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/ChangeLog2009#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/ChangeLog2010#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/LGPL.txt#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/LIBDWARFCOPYRIGHT#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/NEWS#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/README#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/cmplrs/dwarf_addr_finder.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/common.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/common.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/config.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_abbrev.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_abbrev.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_addr_finder.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_alloc.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_alloc.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_arange.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_arange.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_base_types.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_die_deliv.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_die_deliv.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_elf_access.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_elf_access.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_error.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_error.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_form.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_frame.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_frame.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_frame2.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_frame3.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_funcs.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_funcs.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_global.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_global.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_harmless.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_harmless.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_incl.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_init_finish.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_leb.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_line.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_line.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_line2.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_loc.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_loc.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_macro.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_macro.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_names.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_names.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_names_enum.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_names_new.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_opaque.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_original_elf_init.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_print_lines.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_pubtypes.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_query.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_ranges.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_sort_line.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_string.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_stubs.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_types.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_types.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_util.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_util.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_vars.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_vars.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_weaks.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/dwarf_weaks.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/libdwarf.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/libdwarfdefs.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/malloc_check.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/malloc_check.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_alloc.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_alloc.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_arange.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_arange.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_die.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_die.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_encode_nm.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_encode_nm.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_error.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_error.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_expr.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_expr.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_finish.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_forms.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_frame.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_frame.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_funcs.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_incl.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_init.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_line.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_line.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_macinfo.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_macinfo.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_opaque.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_pubnames.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_reloc.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_reloc.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_reloc_stream.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_reloc_stream.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_reloc_symbolic.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_reloc_symbolic.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_section.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_section.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_types.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_types.h#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_util.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_vars.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libdwarf/src/pro_weaks.c#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libelf/libelf.vcxproj#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libBRIGdwarf/libelf/libelf.vcxproj.filters#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/Android.mk#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/Brig.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/Brig_new.hpp#10 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HDLProcessor.pl#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigContainer.cpp#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigContainer.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigInstr.hdl#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigObjectFile.cpp#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigObjectFile.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigantine.cpp#9 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILBrigantine.h#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILConvertors.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILDisassembler.cpp#10 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILDisassembler.h#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILDump.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILDump.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILFloats.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILFloats.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILInstProps.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILItemBase.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILItems.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILItems.h#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILParser.cpp#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILParser.h#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILSRef.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILScanner.cpp#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILScanner.h#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILScannerRules.cpp#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILScannerRules.re2c#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILScope.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILTypeUtilities.h#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILUtilities.cpp#9 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILUtilities.h#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILValidator.cpp#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILValidator.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILValidatorBase.cpp#9 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILValidatorBase.h#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/HSAILb128_t.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/Makefile#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/build/Makefile#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/build/Makefile.libhsail#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/doxy.cfg#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/generate.pl#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/generate.targets#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/hsail_c.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/hsail_c.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/libHSAIL/libHSAIL.vcxproj#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/hsail-tools/mkmk.pl#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/brig_loader.cpp#12 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.cpp#29 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.hpp#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Interface/SCCommon.h#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Interface/SCHSAInterface.cpp#2 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Interface/SCInterface.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/CompilerExternal.cpp#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/Dev/dev.cpp#14 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/BrigDev.cpp#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/BrigDev.hpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/BrigIR.hpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILArithmetic.cpp#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILCodeGen.cpp#10 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILConversions.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILFinalizer.cpp#19 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILFinalizer.hpp#13 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILFunctions.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILGCN.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILImages.cpp#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILMemory.cpp#10 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSAILSpecial.cpp#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSASymbol.cpp#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc-hsa/Src/hsail-fin/HSASymbol.hpp#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc/Interface/SCCommon.h#37 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/sc/Interface/SCInterface.h#12 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#42 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.h#16 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/build/Makefile.scwrapper#25 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/scCompileBase.cpp#37 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/scCompileBase.h#21 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclEnums.h#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_9/aclTypes.h#6 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/bif/bifbase.cpp#49 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/bif/bifinternal.hpp#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/bif_section_labels.hpp#17 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/BRIGAsmPrinter.cpp#63 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/BRIGAsmPrinter.h#27 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILControlFlow.td#5 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILISelDAGToDAG.cpp#21 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILISelLowering.cpp#52 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILImages.td#7 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILIntrinsics.td#33 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILKernelManager.cpp#21 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILKernelManager.h#8 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILSpecial.td#12 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILUtilityFunctions.cpp#27 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILUtilityFunctions.h#18 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Transforms/Scalar/AMDLowerAtomics.cpp#12 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/utils/TableGen/AMDBRIGLoweringEmitter.cpp#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/utils/TableGen/AMDDAGWalker.cpp#15 integrate
... //depot/stg/opencl/drivers/opencl/compiler/llvm/utils/TableGen/AMDDAGWalker.h#12 integrate
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/loader.cpp#11 integrate
... //depot/stg/opencl/drivers/opencl/compiler/tools/aoc2/aoc2.cpp#56 integrate
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/image/get.cl#8 integrate
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/misc/atomicWorkItemFence.cl#5 integrate
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#171 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.cpp#36 integrate
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsaprogram.cpp#5 integrate
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLEnumCheck.cpp#35 integrate
2014-07-31 04:43:02 -04:00

2231 lines
79 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "device/gpu/gpuprogram.hpp"
#include "device/gpu/gpublit.hpp"
#include "macrodata.h"
#include "MDParser/AMDILMDInterface.h"
#include <fstream>
#include <sstream>
#include <cstdio>
#include "utils/options.hpp"
#include "newcore.h"
extern "C" bool
ACL_API_ENTRY aclHsaLoader(
aclCompiler* compiler_handle,
aclBinary* bin,
void* userData,
void (*allocateGPUMemory)(void* userData, size_t size, uint64_t* GPUMemory),
bool (*DmaMemoryCopy)(void* userData, uint64_t offset, const void* pSrc, size_t size),
void (*getSamplerObjectParam)(uint32_t* size, uint32_t* alignment),
void (*initializeSamplerObject)(void* userData, uint64_t offset, bool unnormalize,
uint8_t fltr, uint8_t addrU, uint8_t addrV, uint8_t addrW));
bool
DmaMemoryCopy(void* userData, uint64_t offset, const void* pSrc, size_t size)
{
gpu::HSAILProgram* prog = reinterpret_cast<gpu::HSAILProgram*>(userData);
gpu::Memory* mem = const_cast<gpu::Memory*>(prog->globalStore());
if (mem == NULL) {
return false;
}
size_t maxCopySize = prog->globalVariableTotalSize();
if (maxCopySize >= size) {
maxCopySize = size;
}
amd::Coord3D origin(offset);
amd::Coord3D region(maxCopySize);
// memcpy mode
if (pSrc) {
const bool Entire = true;
return prog->dev().xferMgr().writeBuffer(pSrc, *mem, origin, region, Entire);
}
// memset mode
else {
char pattern = 0;
return prog->dev().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern),
origin, region);
}
}
void
AllocateGPUMemory(void* userData, size_t size, uint64_t* GPUMemory)
{
gpu::Memory* mem = NULL;
void* cpuPtr = NULL;
gpu::HSAILProgram* prog = reinterpret_cast<gpu::HSAILProgram*>(userData);
mem = new gpu::Memory(prog->dev(), amd::alignUp(size, gpu::ConstBuffer::VectorSize));
// Initialize constant buffer
if ((mem == NULL) || !mem->create(gpu::Resource::Local)) {
delete mem;
*GPUMemory = 0;
return;
}
*GPUMemory = mem->vmAddress();
prog->setGlobalStore(mem);
prog->setGlobalVariableTotalSize(size);
}
void
GetSamplerObjectParams(uint32_t* size, uint32_t* alignment)
{
if (GPU_DIRECT_SRD) {
*size = HSA_SAMPLER_OBJECT_SIZE;
*alignment = HSA_SAMPLER_OBJECT_ALIGNMENT;
}
else {
*size = sizeof(uint64_t);
*alignment = sizeof(uint64_t);
}
}
void
InitializeSamplerObject(void* userData, uint64_t offset, bool unnormalize,
uint8_t fltr, uint8_t addrU, uint8_t addrV, uint8_t addrW)
{
assert((addrU == addrV && addrV == addrW) && "GSL supports single address mode");
HsaSamplerFilterType filter = static_cast<HsaSamplerFilterType>(fltr);
HsaSamplerAddressMode boundaryU = static_cast<HsaSamplerAddressMode>(addrU);
uint32_t state = (unnormalize) ?
amd::Sampler::StateNormalizedCoordsFalse : amd::Sampler::StateNormalizedCoordsTrue;
if (filter == HSA_SAMP_FILTER_NEAREST) {
state |= amd::Sampler::StateFilterNearest;
}
else if (filter == HSA_SAMP_FILTER_LINEAR) {
state |= amd::Sampler::StateFilterLinear;
}
switch (boundaryU) {
case HSA_SAMP_ADDRESS_CLAMPEDGE:
state |= amd::Sampler::StateAddressClampToEdge;
break;
case HSA_SAMP_ADDRESS_CLAMPBORDER:
state |= amd::Sampler::StateAddressClamp;
break;
case HSA_SAMP_ADDRESS_WRAP:
state |= amd::Sampler::StateAddressRepeat;
break;
case HSA_SAMP_ADDRESS_MIRROR:
state |= amd::Sampler::StateAddressMirroredRepeat;
break;
case HSA_SAMP_ADDRESS_MIRRORONCE:
case HSA_SAMP_ADDRESS_NONE:
default:
break;
}
gpu::HSAILProgram* prog = reinterpret_cast<gpu::HSAILProgram*>(userData);
if (prog->dev().settings().hsailDirectSRD_) {
char *pCPUbuf = new char[HSA_SAMPLER_OBJECT_SIZE];
if (!pCPUbuf) {
assert(false);
return;
}
prog->dev().fillHwSampler(state, pCPUbuf, HSA_SAMPLER_OBJECT_SIZE);
DmaMemoryCopy(userData, offset, pCPUbuf, HSA_SAMPLER_OBJECT_SIZE);
delete pCPUbuf;
}
else {
gpu::Sampler* sampler = new gpu::Sampler(prog->dev());
if ((sampler != NULL) && sampler->create(state)) {
uint64_t hwSrd = sampler->hwSrd();
DmaMemoryCopy(userData, offset, &hwSrd, sizeof(uint64_t));
prog->addSampler(sampler);
}
}
return;
}
namespace gpu {
bool
NullProgram::initBuild(amd::option::Options* options)
{
if (!device::Program::initBuild(options)) {
return false;
}
const char* devname = dev().hwInfo()->machineTarget_;
options->setPerBuildInfo(
(devname && (devname[0] != '\0')) ? devname : "gpu",
clBinary()->getEncryptCode(),
true // FIXME: the dev ptr is used to query the wavefront size.
);
// Elf Binary setup
std::string outFileName;
// Recompile from IL may happen (invoking Kernel::recompil()) to generate correct
// isa code for 7xx. Because of this, force saving AMDIL into the binary.
clBinary()->init(options, (dev().calTarget() <= CAL_TARGET_730));
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
outFileName = options->getDumpFileName(".bin");
}
bool useELF64 = dev().settings().use64BitPtr_;
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
(outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
LogError("Setup elf out for gpu failed");
return false;
}
return true;
}
bool
NullProgram::finiBuild(bool isBuildGood)
{
clBinary()->resetElfOut();
clBinary()->resetElfIn();
if (!isBuildGood) {
// Prevent the encrypted binary form leaking out
clBinary()->setBinary(NULL, 0);
}
return device::Program::finiBuild(isBuildGood);
}
const aclTargetInfo &
NullProgram::info(const char * str) {
acl_error err;
std::string arch = GPU_TARGET_INFO_ARCH;
if (dev().settings().use64BitPtr_) {
arch += "64";
}
info_ = aclGetTargetInfo(arch.c_str(), ( str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str ), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
return info_;
}
NullProgram::~NullProgram()
{
// Destroy all ILFunc objects
freeAllILFuncs();
releaseClBinary();
}
bool
NullProgram::isCalled(const ILFunc* base, const ILFunc* func)
{
// Loop through all functions, which will be called from the base one
for (size_t i = 0; i < base->calls_.size(); ++i) {
assert(base->calls_[i] != base && "recursion");
// Check if the current function is the one
if (base->calls_[i] == func) {
return true;
}
// We have to use a recursive method to make sure it's not called inside
else if (isCalled(base->calls_[i], func)) {
return true;
}
}
return false;
}
void
NullProgram::patchMain(std::string& kernel, uint index)
{
std::string callPatch = "call ";
char sym;
// Create the patch string
while (index) {
sym = (index % 10) + 0x30;
callPatch.insert(5, &sym, 1);
index /= 10;
}
callPatch += ";";
// Patch the program
kernel.replace(patch_, callPatch.size(), callPatch);
}
NullKernel*
Program::createKernel(
const std::string& name, const Kernel::InitData* initData,
const std::string& code, const std::string& metadata, bool* created,
const void* binaryCode, size_t binarySize)
{
amd::option::Options *options = getCompilerOptions();
uint64_t start_time = 0;
if (options->oVariables->EnableBuildTiming) {
start_time = amd::Os::timeNanos();
}
*created = false;
// Create a GPU kernel
Kernel* gpuKernel = new Kernel(name,
static_cast<const gpu::Device&>(device()), *this, initData);
if (gpuKernel == NULL) {
buildLog_ += "new Kernel() failed";
LogPrintfError("new Kernel() failed for kernel %s!", name.c_str());
return NULL;
}
else if (gpuKernel->create(code, metadata, binaryCode, binarySize)) {
// Add kernel to the program
kernels()[gpuKernel->name()] = gpuKernel;
buildLog_ += gpuKernel->buildLog();
}
else {
buildError_ = gpuKernel->buildError();
buildLog_ += gpuKernel->buildLog();
delete gpuKernel;
LogPrintfError("Kernel creation failed for kernel %s!", name.c_str());
return NULL;
}
if (options->oVariables->EnableBuildTiming) {
std::stringstream tmp_ss;
tmp_ss << " Time for creating kernel ("
<< name << ") : "
<< (amd::Os::timeNanos() - start_time)/1000ULL
<< " us\n";
buildLog_ += tmp_ss.str();
}
*created = true;
return static_cast<NullKernel*>(gpuKernel);
}
bool
NullProgram::linkImpl(amd::option::Options* options)
{
if (llvmBinary_.empty()) {
// We are using either CL binary or IL directly.
bool hasRecompiled;
if (ilProgram_.empty()) {
// Setup elfIn() and try to load ISA from binary
// This elfIn() will be released at the end of build by finiBuild().
if (!clBinary()->setElfIn(ELFCLASS32)) {
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
LogError("Setting input OCL binary failed");
return false;
}
bool loadSuccess = false;
if (!options->oVariables->ForceLLVM) {
loadSuccess = loadBinary(&hasRecompiled);
}
if (!loadSuccess &&
(options->oVariables->UseDebugIL &&
!options->oVariables->ForceLLVM)) {
buildLog_ += "Internal error: Loading OpenCL binary under -use-debugil failed!\n";
LogError("Loading OCL binary failed under -use-debugil");
return false;
}
if (loadSuccess) {
if (hasRecompiled) {
char *section;
size_t sz;
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
if (clBinary()->saveLLVMIR()) {
if (clBinary()->loadLlvmBinary(llvmBinary_, llvmBinaryIsSpir_) && (!llvmBinary_.empty())) {
clBinary()->elfOut()->addSection(llvmBinaryIsSpir_?amd::OclElf::SPIR:amd::OclElf::LLVMIR,
llvmBinary_.data(), llvmBinary_.size(), false);
}
}
setType(TYPE_EXECUTABLE);
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
buildLog_ += "Internal error: Failed to create OpenCL binary!\n";
LogError("Failed to create OpenCL binary");
return false;
}
}
else {
// The original binary is good and reuse it.
// Release the new binary if there is.
clBinary()->restoreOrigBinary();
}
return true;
}
else if (clBinary()->loadLlvmBinary(llvmBinary_, llvmBinaryIsSpir_) &&
clBinary()->isRecompilable(llvmBinary_, amd::OclElf::CAL_PLATFORM)) {
char *section;
size_t sz;
// Clean up and remove all the content generated before
if (!clBinary()->clearElfOut()) {
buildLog_ += "Internal error: Resetting OpenCL Binary failed!\n";
LogError("Resetting output OCL binary failed");
return false;
}
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(llvmBinaryIsSpir_?amd::OclElf::SPIR:amd::OclElf::LLVMIR,
llvmBinary_.data(), llvmBinary_.size(), false);
}
}
else {
buildLog_ += "Internal error: Input OpenCL binary is not for the target!\n";
LogError("OCL Binary isn't good for the target");
return false;
}
}
}
if (!llvmBinary_.empty()) {
// Compile llvm binary to the IL source code
// This is link/OPT/Codegen part of compiler.
cl_int iErr = compileBinaryToIL(options);
if (iErr != CL_SUCCESS) {
buildLog_ += "Error: Compilation from LLVMIR binary to IL text failed!";
LogError(buildLog_.c_str());
return false;
}
}
if (!ilProgram_.empty() && options->oVariables->EnableDebug) {
// Lets parse out the dwarf debug information and store it in the elf
llvm::CompUnit compilation(ilProgram_);
std::string debugILStr = compilation.getILStr();
const char* dbgSec = debugILStr.c_str();
size_t dbgSize = debugILStr.size();
// Add an IL section that contains debug information and is the
// output of LLVM codegen.
clBinary()->elfOut()->addSection(amd::OclElf::ILDEBUG, dbgSec, dbgSize);
if ((dbgSize > 0) && options->isDumpFlagSet(amd::option::DUMP_DEBUGIL)) {
std::string debugilWithLine;
size_t b = 1;
size_t e;
int linenum=0;
char cstr[9];
cstr[8] = 0;
while (b != std::string::npos) {
e = debugILStr.find_first_of("\n", b);
if (e != std::string::npos) {
++e;
}
sprintf(&cstr[0], "%5x: ", linenum);
debugilWithLine.append(cstr);
debugilWithLine.append(debugILStr.substr(b,e-b));
b = e;
++linenum;
}
std::string debugilFileName = options->getDumpFileName(".debugil");
std::fstream f;
f.open(debugilFileName.c_str(), (std::fstream::out | std::fstream::binary));
f.write(debugilWithLine.c_str(), debugilWithLine.size());
f.close();
}
for (unsigned x = 0; x < llvm::AMDILDwarf::DEBUG_LAST; ++x) {
dbgSec = compilation.getDebugData()->getDwarfBitstream(
static_cast<llvm::AMDILDwarf::DwarfSection>(x), dbgSize);
// Do not create an elf section if the size of the section is
// 0.
if (!dbgSize) {
continue;
}
clBinary()->elfOut()->addSection(
static_cast<amd::OclElf::oclElfSections>(x
+ amd::OclElf::DEBUG_INFO), dbgSec, dbgSize);
}
}
// Create kernel objects
if (!ilProgram_.empty() && parseKernels(ilProgram_)) {
// Loop through all possible kernels
for (size_t i = 0; i < funcs_.size(); ++i) {
ILFunc* baseFunc = funcs_[i];
// Make sure we have a Kernel function, but not Intrinsic or Simple
if (baseFunc->state_ == ILFunc::Kernel) {
size_t metadataSize =
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
std::string kernel = ilProgram_;
std::string metadataStr;
std::vector<ILFunc*> notCalled;
std::vector<ILFunc*> called;
std::map<int, const char**> macros;
size_t j;
Kernel::InitData initData = {0};
// Fill the list of not used functions, relativly to the current
for (j = 0; j < funcs_.size(); ++j) {
if ((i != j) &&
((funcs_[j]->state_ == ILFunc::Regular) ||
(funcs_[j]->state_ == ILFunc::Kernel))) {
if (!isCalled(baseFunc, funcs_[j])) {
notCalled.push_back(funcs_[j]);
}
else {
called.push_back(funcs_[j]);
}
}
}
// Get the metadata string for the current kernel
metadataStr.insert(0, kernel,
baseFunc->metadata_.begin_, metadataSize);
std::vector<ILFunc::SourceRange*> rangeList;
// Remove unused kernels, starting from the end
for (j = notCalled.size(); j > 0; --j) {
ILFunc* func = notCalled[j-1];
std::vector<ILFunc::SourceRange*>::iterator it;
for (it = rangeList.begin(); it != rangeList.end(); ++it) {
if ((*it)->begin_ < func->metadata_.begin_) {
assert((*it)->begin_ < func->code_.begin_
&& "code and metadata not next to each other");
break;
}
assert((*it)->begin_ >= func->code_.begin_
&& "code and metadata not next to each other");
}
assert(func->metadata_.begin_ > func->code_.begin_
&& "code after metadata");
if (it == rangeList.end()) {
rangeList.push_back(&func->metadata_);
rangeList.push_back(&func->code_);
}
else {
it = rangeList.insert(it, &func->code_);
rangeList.insert(it, &func->metadata_);
}
}
for (j = 0; j < rangeList.size(); ++j) {
const ILFunc::SourceRange* range = rangeList[j];
kernel.erase(range->begin_, range->end_ - range->begin_);
}
// Patch the main program with a call to the current kernel
patchMain(kernel, baseFunc->index_);
// Add macros at the top, loop through all available functions
// for this kernel
for (j = 0; j <= called.size(); ++j) {
ILFunc* func = (j < called.size()) ? called[j] : baseFunc;
for (size_t l = func->macros_.size(); l > 0 ; --l) {
int lines;
int idx = static_cast<int>(func->macros_[l - 1]);
const char** macro = amd::MacroDBGetMacro(&lines, idx);
// Make sure we didn't place this macro already
if (macros[idx] == NULL) {
macros[idx] = macro;
// Do we have a valid macro?
if ((lines == 0) || (macro == NULL)) {
buildLog_ += "Error: undefined macro!\n";
LogPrintfError(
"Metadata reports undefined macro %d!", idx);
return false;
}
else {
// Add the macro to the IL source
for (int k = 0; k < lines; ++k) {
kernel.insert(0, macro[k], strlen(macro[k]));
}
}
}
}
// Accumulate all emulated local and private sizes,
// necessary for the kernel execution
initData.localSize_ += func->localSize_;
initData.privateSize_ += func->privateSize_;
// Accumulate all HW local and private sizes,
// necessary for the kernel execution
initData.hwLocalSize_ += func->hwLocalSize_;
initData.hwPrivateSize_ += func->hwPrivateSize_;
initData.flags_ |= func->flags_;
}
// Create a GPU kernel
bool created;
NullKernel* gpuKernel = createKernel(baseFunc->name_,
&initData, kernel.data(), metadataStr, &created);
if (!created) {
buildLog_ += "Error: Creating kernel " +
baseFunc->name_ + " failed!\n";
LogError(buildLog_.c_str());
return false;
}
// Add the current kernel to the binary
if (!clBinary()->storeKernel(baseFunc->name_, gpuKernel,
&initData, metadataStr, kernel)) {
buildLog_ += "Internal error: adding a kernel into OpenCL binary failed!\n";
return false;
}
}
else {
// Non-kernel function, save metadata symbols for recompilation
if (clBinary()->saveAMDIL()) {
size_t metadataSize =
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
if (metadataSize <= 0) {
continue;
}
std::string metadataStr;
// Get the metadata string
metadataStr.insert(0, ilProgram_, baseFunc->metadata_.begin_,
metadataSize);
std::stringstream aStream;
aStream << "__OpenCL_" << baseFunc->name_ << "_fmetadata";
std::string metaName = aStream.str();
// Save metadata symbols in .rodata
if (!clBinary()->elfOut()->addSymbol(amd::OclElf::RODATA,
metaName.c_str(),
metadataStr.data(),
metadataStr.size())) {
buildLog_ += "Internal error: addSymbol failed!\n";
LogError ("AddSymbol failed");
return false;
}
}
}
}
setType(TYPE_EXECUTABLE);
if (!createBinary(options)) {
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
return false;
}
// Destroy all ILFunc objects
freeAllILFuncs();
ilProgram_.clear();
return true;
}
return false;
}
bool
NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
amd::option::Options* options,
bool createLibrary)
{
std::vector<std::string*> llvmBinaries(inputPrograms.size());
std::vector<bool> llvmBinaryIsSpir(inputPrograms.size());
std::vector<device::Program*>::const_iterator it
= inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd
= inputPrograms.end();
for (size_t i = 0; it != itEnd; ++it, ++i) {
NullProgram* program = (NullProgram*)*it;
if (program->llvmBinary_.empty()) {
if (program->clBinary() == NULL) {
buildLog_ += "Internal error: Input program not compiled!\n";
LogError("Loading compiled input object failed");
return false;
}
// We are using CL binary directly.
// Setup elfIn() and try to load llvmIR from binary
// This elfIn() will be released at the end of build by finiBuild().
if (!program->clBinary()->setElfIn(ELFCLASS32)) {
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
LogError("Setting input OCL binary failed");
return false;
}
if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_,
program->llvmBinaryIsSpir_)) {
buildLog_
+= "Internal error: Failed loading compiled binary!\n";
LogError("Bad OCL Binary");
return false;
}
if (!program->clBinary()->isRecompilable(program->llvmBinary_,
amd::OclElf::CAL_PLATFORM)) {
buildLog_ += "Internal error: Input OpenCL binary is not"
" for the target!\n";
LogError("OCL Binary isn't good for the target");
return false;
}
#if 0
// TODO: copy .source over to output program
char *section;
size_t sz;
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
#endif
}
llvmBinaries[i] = &program->llvmBinary_;
llvmBinaryIsSpir[i] = program->llvmBinaryIsSpir_;
}
acl_error err;
aclTargetInfo aclinfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclAMDIL64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
std::vector<aclBinary*> libs(llvmBinaries.size(), NULL);
for (size_t i = 0; i < libs.size(); ++i) {
libs[i] = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclBinaryInit failed");
break;
}
err = aclInsertSection(dev().compiler(), libs[i],
llvmBinaries[i]->data(), llvmBinaries[i]->size(),
llvmBinaryIsSpir[i]?aclSPIR:aclLLVMIR);
if (err != ACL_SUCCESS) {
LogWarning("aclInsertSection failed");
break;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)libs[i]->options)->setBuildNo(
options->getBuildNo());
}
if (libs.size() > 0 && err == ACL_SUCCESS) do {
unsigned int numLibs = libs.size() - 1;
if (numLibs > 0) {
err = aclLink(dev().compiler(), libs[0], numLibs, &libs[1],
ACL_TYPE_LLVMIR_BINARY, "-create-library", NULL);
buildLog_ += aclGetCompilerLog(dev().compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclLink failed");
break;
}
}
size_t size = 0;
const void* llvmir = aclExtractSection(dev().compiler(), libs[0],
&size, aclLLVMIR, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
break;
}
llvmBinary_.assign(reinterpret_cast<const char*>(llvmir), size);
llvmBinaryIsSpir_ = false;
} while(0);
std::for_each(libs.begin(), libs.end(), std::ptr_fun(aclBinaryFini));
if (err != ACL_SUCCESS) {
buildLog_ += "Error: linking llvm modules failed!";
return false;
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR,
llvmBinary_.data(), llvmBinary_.size(),
false);
// store the original link options
clBinary()->storeLinkOptions(linkOptions_);
clBinary()->storeCompileOptions(compileOptions_);
}
// skip the rest if we are building an opencl library
if (createLibrary) {
setType(TYPE_LIBRARY);
if (!createBinary(options)) {
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
return false;
}
return true;
}
// Compile llvm binary to the IL source code
// This is link/OPT/Codegen part of compiler.
cl_int iErr = compileBinaryToIL(options);
if (iErr != CL_SUCCESS) {
buildLog_ += "Error: Compilation from LLVMIR binary to IL text failed!";
LogError(buildLog_.c_str());
return false;
}
if (!ilProgram_.empty() && options->oVariables->EnableDebug) {
// Lets parse out the dwarf debug information and store it in the elf
llvm::CompUnit compilation(ilProgram_);
std::string debugILStr = compilation.getILStr();
const char* dbgSec = debugILStr.c_str();
size_t dbgSize = debugILStr.size();
// Add an IL section that contains debug information and is the
// output of LLVM codegen.
clBinary()->elfOut()->addSection(amd::OclElf::ILDEBUG, dbgSec, dbgSize);
if ((dbgSize > 0) && options->isDumpFlagSet(amd::option::DUMP_DEBUGIL)) {
std::string debugilWithLine;
size_t b = 1;
size_t e;
int linenum=0;
char cstr[9];
cstr[8] = 0;
while (b != std::string::npos) {
e = debugILStr.find_first_of("\n", b);
if (e != std::string::npos) {
++e;
}
sprintf(&cstr[0], "%5x: ", linenum);
debugilWithLine.append(cstr);
debugilWithLine.append(debugILStr.substr(b,e-b));
b = e;
++linenum;
}
std::string debugilFileName = options->getDumpFileName(".debugil");
std::fstream f;
f.open(debugilFileName.c_str(), (std::fstream::out | std::fstream::binary));
f.write(debugilWithLine.c_str(), debugilWithLine.size());
f.close();
}
for (unsigned x = 0; x < llvm::AMDILDwarf::DEBUG_LAST; ++x) {
dbgSec = compilation.getDebugData()->getDwarfBitstream(
static_cast<llvm::AMDILDwarf::DwarfSection>(x), dbgSize);
// Do not create an elf section if the size of the section is
// 0.
if (!dbgSize) {
continue;
}
clBinary()->elfOut()->addSection(
static_cast<amd::OclElf::oclElfSections>(x
+ amd::OclElf::DEBUG_INFO), dbgSec, dbgSize);
}
}
// Create kernel objects
if (!ilProgram_.empty() && parseKernels(ilProgram_)) {
// Loop through all possible kernels
for (size_t i = 0; i < funcs_.size(); ++i) {
ILFunc* baseFunc = funcs_[i];
// Make sure we have a Kernel function, but not Intrinsic or Simple
if (baseFunc->state_ == ILFunc::Kernel) {
size_t metadataSize =
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
std::string kernel = ilProgram_;
std::string metadataStr;
std::vector<ILFunc*> notCalled;
std::vector<ILFunc*> called;
std::map<int, const char**> macros;
size_t j;
Kernel::InitData initData = {0};
// Fill the list of not used functions, relativly to the current
for (j = 0; j < funcs_.size(); ++j) {
if ((i != j) &&
((funcs_[j]->state_ == ILFunc::Regular) ||
(funcs_[j]->state_ == ILFunc::Kernel))) {
if (!isCalled(baseFunc, funcs_[j])) {
notCalled.push_back(funcs_[j]);
}
else {
called.push_back(funcs_[j]);
}
}
}
// Get the metadata string for the current kernel
metadataStr.insert(0, kernel,
baseFunc->metadata_.begin_, metadataSize);
std::vector<ILFunc::SourceRange*> rangeList;
// Remove unused kernels, starting from the end
for (j = notCalled.size(); j > 0; --j) {
ILFunc* func = notCalled[j-1];
std::vector<ILFunc::SourceRange*>::iterator it;
for (it = rangeList.begin(); it != rangeList.end(); ++it) {
if ((*it)->begin_ < func->metadata_.begin_) {
assert((*it)->begin_ < func->code_.begin_
&& "code and metadata not next to each other");
break;
}
assert((*it)->begin_ >= func->code_.begin_
&& "code and metadata not next to each other");
}
assert(func->metadata_.begin_ > func->code_.begin_
&& "code after metadata");
if (it == rangeList.end()) {
rangeList.push_back(&func->metadata_);
rangeList.push_back(&func->code_);
}
else {
it = rangeList.insert(it, &func->code_);
rangeList.insert(it, &func->metadata_);
}
}
for (j = 0; j < rangeList.size(); ++j) {
const ILFunc::SourceRange* range = rangeList[j];
kernel.erase(range->begin_, range->end_ - range->begin_);
}
// Patch the main program with a call to the current kernel
patchMain(kernel, baseFunc->index_);
// Add macros at the top, loop through all available functions
// for this kernel
for (j = 0; j <= called.size(); ++j) {
ILFunc* func = (j < called.size()) ? called[j] : baseFunc;
for (size_t l = func->macros_.size(); l > 0 ; --l) {
int lines;
int idx = static_cast<int>(func->macros_[l - 1]);
const char** macro = amd::MacroDBGetMacro(&lines, idx);
// Make sure we didn't place this macro already
if (macros[idx] == NULL) {
macros[idx] = macro;
// Do we have a valid macro?
if ((lines == 0) || (macro == NULL)) {
buildLog_ += "Error: undefined macro!\n";
LogPrintfError(
"Metadata reports undefined macro %d!", idx);
return false;
}
else {
// Add the macro to the IL source
for (int k = 0; k < lines; ++k) {
kernel.insert(0, macro[k], strlen(macro[k]));
}
}
}
}
// Accumulate all emulated local and private sizes,
// necessary for the kernel execution
initData.localSize_ += func->localSize_;
initData.privateSize_ += func->privateSize_;
// Accumulate all HW local and private sizes,
// necessary for the kernel execution
initData.hwLocalSize_ += func->hwLocalSize_;
initData.hwPrivateSize_ += func->hwPrivateSize_;
initData.flags_ |= func->flags_;
}
// Create a GPU kernel
bool created;
NullKernel* gpuKernel = createKernel(baseFunc->name_,
&initData, kernel.data(), metadataStr, &created);
if (!created) {
buildLog_ += "Error: Creating kernel " +
baseFunc->name_ + " failed!\n";
LogError(buildLog_.c_str());
return false;
}
// Add the current kernel to the binary
if (!clBinary()->storeKernel(baseFunc->name_, gpuKernel,
&initData, metadataStr, kernel)) {
buildLog_ += "Internal error: adding a kernel into OpenCL binary failed!\n";
return false;
}
}
else {
// Non-kernel function, save metadata symbols for recompilation
if (clBinary()->saveAMDIL()) {
size_t metadataSize =
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
if (metadataSize <= 0) {
continue;
}
std::string metadataStr;
// Get the metadata string
metadataStr.insert(0, ilProgram_, baseFunc->metadata_.begin_,
metadataSize);
std::stringstream aStream;
aStream << "__OpenCL_" << baseFunc->name_ << "_fmetadata";
std::string metaName = aStream.str();
// Save metadata symbols in .rodata
if (!clBinary()->elfOut()->addSymbol(amd::OclElf::RODATA,
metaName.c_str(),
metadataStr.data(),
metadataStr.size())) {
buildLog_ += "Internal error: addSymbol failed!\n";
LogError ("AddSymbol failed");
return false;
}
}
}
}
setType(TYPE_EXECUTABLE);
if (!createBinary(options)) {
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
return false;
}
// Destroy all ILFunc objects
freeAllILFuncs();
ilProgram_.clear();
return true;
}
return false;
}
bool
NullProgram::initClBinary()
{
if (clBinary_ == NULL) {
clBinary_ = new ClBinary(static_cast<const Device&>(device()));
if (clBinary_ == NULL) {
return false;
}
}
return true;
}
void
NullProgram::releaseClBinary()
{
if (clBinary_ != NULL) {
delete clBinary_;
clBinary_ = NULL;
}
}
bool
NullProgram::loadBinary(bool* hasRecompiled)
{
if (!clBinary()->loadKernels(*this, hasRecompiled)) {
clear();
return false;
}
return true;
}
bool
NullProgram::initGlobalData(const std::string& source, size_t start)
{
size_t pos, dataStart;
// Find the global data store
dataStart= source.find(";#DATASTART", start);
if (dataStart!= std::string::npos) {
uint index = 0;
pos = dataStart + 2;
while (expect(source, &pos, "DATASTART:")) {
uint dataSize = 0;
uint offset;
uint numElements;
size_t posStart;
bool failed = false;
// Kernel has the global constants
if (!getuint(source, &pos, &index)) {
return false;
}
pos--;
if (expect(source, &pos, ":")) {
// Read the size
if (!getuint(source, &pos, &dataSize)) {
return false;
}
}
else {
// Emulated global data store
pos++;
dataSize = index;
index = 0;
}
if (dataSize == 0) {
return false;
}
posStart = pos = source.find_first_not_of(";# \n\r", pos);
char* globalData = new char[dataSize];
if (globalData == NULL) {
return false;
}
// Find the global data size
while (!expect(source, &pos, "DATAEND")) {
for (uint i = 0; i < DataTypeTotal; ++i) {
if (expect(source, &pos, DataType[i].tagName_)) {
// Read the offset
if (!getuint(source, &pos, &offset)) {
return false;
}
if (!getuint(source, &pos, &numElements)) {
return false;
}
for (uint j = 0; j < numElements; ++j) {
switch (DataType[i].type_) {
case KernelArg::Float: {
uint32_t* tmp = reinterpret_cast<uint32_t*>(globalData + offset);
if (!getuintHex(source, &pos, &tmp[j])) {
failed = true;
}
}
break;
case KernelArg::Double: {
uint64_t* tmp = reinterpret_cast<uint64_t*>(globalData + offset);
if (!getuint64Hex(source, &pos, &tmp[j])) {
failed = true;
}
}
break;
case KernelArg::Struct:
case KernelArg::Union:
// Struct and Union should be presented as bytes
// Fall through...
case KernelArg::Char: {
uint8_t* tmp = reinterpret_cast<uint8_t*>(globalData + offset);
uint value;
if (!getuintHex(source, &pos, &value)) {
failed = true;
}
tmp[j] = static_cast<uint8_t>(value);
}
break;
case KernelArg::Short: {
uint16_t* tmp = reinterpret_cast<uint16_t*>(globalData + offset);
uint value;
if (!getuintHex(source, &pos, &value)) {
failed = true;
}
tmp[j] = static_cast<uint16_t>(value);
}
break;
case KernelArg::Int:
case KernelArg::UInt: {
uint32_t* tmp = reinterpret_cast<uint32_t*>(globalData + offset);
if (!getuintHex(source, &pos, &tmp[j])) {
failed = true;
}
}
break;
case KernelArg::Long:
case KernelArg::ULong: {
uint64_t* tmp = reinterpret_cast<uint64_t*>(globalData + offset);
if (!getuint64Hex(source, &pos, &tmp[j])) {
failed = true;
}
}
break;
case KernelArg::None:
default:
break;
}
if (failed) {
delete [] globalData;
return false;
}
}
break;
}
}
if (posStart == pos) {
delete [] globalData;
return false;
}
posStart = pos = source.find_first_not_of(";# \n\r", pos);
}
if (!allocGlobalData(globalData, dataSize, index)) {
failed = true;
}
if (!clBinary()->storeGlobalData(globalData, dataSize, index)) {
failed = true;
}
delete [] globalData;
// Erase the global store information
if (index != 0) {
if (expect(source, &pos, ":")) {
// Read the size
if (!getuint(source, &pos, &index)) {
return false;
}
}
}
pos = source.find_first_not_of(";# \n\r", pos);
(const_cast<std::string&>(source)).erase(dataStart, pos - dataStart);
pos = dataStart;
if (failed) {
return false;
}
}
}
return true;
}
bool
NullProgram::findILFuncs(const std::string& source,
const std::string &func_start,
const std::string &func_end,
size_t& lastFuncPos)
{
lastFuncPos = 0;
// Find first tag
size_t pos = source.find(func_start);
// Loop through all provided program arguments
while (pos != std::string::npos) {
char funcName[256];
ILFunc func;
func.code_.begin_ = pos;
if (!expect(source, &pos, func_start)) {
break;
}
pos = source.find_first_not_of(" \n\r", pos);
// Read the function index
if (!getuint(source, &pos, &func.index_)) {
LogError("Error reading function index");
return false;
}
pos = source.find_first_of(";\n\r", pos);
if (source[pos] == '\r' || source[pos] == '\n') {
// this is the dummy macro
func.name_ = std::string("");
}
else {
pos = source.find_first_not_of("; \n\r", pos);
// Read the function's name
if (!getword(source, &pos, funcName)) {
LogError("Error reading function name");
return false;
}
func.name_ = funcName;
}
// Find the function end
pos = source.find(func_end, pos);
if (!expect(source, &pos, func_end)) {
break;
}
if (source[pos] == '\r' || source[pos] == '\n') {
if (!func.name_.empty()) {
LogError("Missing function name");
return false;
}
}
else {
// this is the dummy macro
pos = source.find_first_not_of("; \n\r", pos);
if (!expect(source, &pos, funcName)) {
LogError("Error reading function name");
return false;
}
}
// Save the function end
func.code_.end_ = pos;
if (!func.name_.empty()) {
// Create a new function
ILFunc* clFunc = new ILFunc(func);
if (clFunc != NULL) {
addFunc(clFunc);
}
else {
return false;
}
}
lastFuncPos = pos;
// Next function
pos = source.find(func_start, pos);
}
return true;
}
bool
NullProgram::findAllILFuncs(const std::string& source, size_t& lastFuncPos)
{
// find all functions defined using "func"
size_t lastPos1;
bool ret = findILFuncs(source, "func ", "endfunc ", lastPos1);
if (!ret) return false;
// find all functions defined using outlined macro
size_t lastPos2;
ret = findILFuncs(source, "mdef(", "mend", lastPos2);
if (!ret) return false;
lastFuncPos = std::max(lastPos1, lastPos2);
return true;
}
bool
NullProgram::parseAllILFuncs(const std::string& source)
{
bool doPatch = true;
amd::option::Options *opts = getCompilerOptions();
if (opts->isCStrEqual(opts->oVariables->XLang, "il")) {
doPatch = false;
}
// Find the patch position
if (doPatch) {
patch_ = source.find(";$$$$$$$$$$");
if (patch_ == std::string::npos) {
return false;
}
}
size_t lastFuncPos = 0;
if (!findAllILFuncs(source, lastFuncPos)) {
return false;
}
// Initialize the global data if available
if (!initGlobalData(source, lastFuncPos)) {
LogError("We failed the global constants detection/initialization!");
return false;
}
return true;
}
bool
NullProgram::parseFuncMetadata(const std::string& source, size_t posBegin, size_t posEnd)
{
ILFunc* baseFunc = NULL;
uint index;
size_t pos = posBegin;
while (pos < posEnd) {
if (!expect(source, &pos, ";")) {
break;
}
for (uint k = 0; k < DescTotal; ++k) {
uint funcIndex;
uint j;
if (expect(source, &pos, ArgState[k].typeName_)) {
if (ArgState[k].type_ == KernelArg::ErrorMessage) {
// Next argument
size_t posNext = source.find(";", pos);
buildLog_.append("Error:");
buildLog_.append(source.substr(pos, posNext - pos));
return false;
}
else if (ArgState[k].type_ == KernelArg::WarningMessage) {
// Next argument
size_t posNext = source.find(";", pos);
buildLog_.append("Warning:");
buildLog_.append(source.substr(pos, posNext - pos));
continue;
}
else if (ArgState[k].type_ == KernelArg::PrivateFixed) {
baseFunc->flags_ |= Kernel::PrivateFixed;
continue;
}
else if (ArgState[k].type_ == KernelArg::ABI64Bit) {
baseFunc->flags_ |= Kernel::ABI64bit;
continue;
}
else if (ArgState[k].type_ == KernelArg::Wavefront) {
baseFunc->flags_ |= Kernel::LimitWorkgroup;
continue;
}
else if (ArgState[k].type_ == KernelArg::PrintfFormatStr) {
uint tmp;
uint arguments;
PrintfInfo info;
// Read index
if (!getuint(source, &pos, &index)) {
return false;
}
if (printf_.size() <= index) {
printf_.resize(index + 1);
}
// Read the number of arguments
if (!getuint(source, &pos, &arguments)) {
return false;
}
for (uint j = 0; j < arguments; ++j) {
// Read the argument's size in bytes
if (!getuint(source, &pos, &tmp)) {
return false;
}
info.arguments_.push_back(tmp);
}
// Read length
if (!getuint(source, &pos, &tmp)) {
return false;
}
// Read string (uses length so all possible chars are valid)
for (size_t i = 0; i < tmp; ++i) {
char symbol = source[pos++];
if (symbol == '\\') {
switch (source[pos]) {
case 'n':
pos++;
symbol = '\n';
break;
case 'r':
pos++;
symbol = '\r';
break;
default:
break;
}
}
info.fmtString_.push_back(symbol);
}
if (!expect(source, &pos, ";")) {
return false;
}
printf_[index] = info;
baseFunc->flags_ |= Kernel::PrintfOutput;
// Process next token ...
continue;
}
else if (ArgState[k].type_ == KernelArg::MetadataVersion) {
continue;
}
// Read the index
if (!getuint(source, &pos, &index)) {
return false;
}
switch (ArgState[k].type_) {
case KernelArg::PrivateSize:
baseFunc->privateSize_ = index;
continue;
case KernelArg::LocalSize:
baseFunc->localSize_ = index;
continue;
case KernelArg::HwPrivateSize:
baseFunc->hwPrivateSize_ = index;
continue;
case KernelArg::HwLocalSize:
baseFunc->hwLocalSize_ = index;
continue;
default:
break;
}
if (!ArgState[k].size_) {
// Find the base function
baseFunc = findILFunc(index);
if (baseFunc == NULL) {
return false;
}
// Sanity check
if (baseFunc->state_ != ILFunc::Unknown) {
buildLog_ = "Error: Creating kernel ";
buildLog_ += baseFunc->name_;
buildLog_ += " failed!\n";
LogError(buildLog_.c_str());
continue;
}
// If we have __OpenCL_ prefix in the name
// and _kernel suffix, then this is a kernel function
const std::string prefix = "__OpenCL_";
const std::string postfix = "_kernel";
const std::string &fname = baseFunc->name_;
size_t namelen = fname.size();
size_t postfixPos = namelen - postfix.size();
if (fname.compare(0, prefix.size(), prefix) == 0 &&
fname.compare(postfixPos, namelen, postfix) == 0) {
baseFunc->state_ = ILFunc::Kernel;
baseFunc->name_.erase(postfixPos, postfix.size());
baseFunc->name_.erase(0, prefix.size());
}
else {
baseFunc->state_ = ILFunc::Regular;
}
baseFunc->metadata_.begin_ = posBegin;
baseFunc->metadata_.end_ = posEnd;
continue;
}
// Process metadata
for (j = 0; j < index; ++j) {
// Read the index
if (getuint(source, &pos, &funcIndex)) {
bool error = false;
if (ArgState[k].name_) {
ILFunc* func = findILFunc(funcIndex);
if (NULL != func) {
baseFunc->calls_.push_back(func);
}
else {
buildLog_ += "Error: Undeclared function index ";
error = true;
}
}
else {
if (funcIndex != 0xffffffff) {
baseFunc->macros_.push_back(funcIndex);
}
else {
buildLog_ += "Error: Undeclared macro index ";
error = true;
}
}
if (error) {
char str[8];
intToStr(funcIndex, str, 8);
buildLog_ += str;
buildLog_ += "\n";
LogError("Undeclared index!");
return false;
}
}
else {
return false;
}
}
}
}
// Next argument
pos = source.find(";", pos);
}
return true;
}
bool
NullProgram::parseKernels(const std::string& source)
{
size_t pos = 0;
// Strip out all the debug tokens as these are
// not needed yet, but will be used later.
while(1) {
pos = source.find(";DEBUGSTART", pos);
if (pos == std::string::npos) {
break;
}
size_t last = source.find(";DEBUGEND", pos);
const_cast<std::string&>(source).erase(pos, last - pos + 10);
pos = last;
}
// Create a list of all functions in the program
if (!parseAllILFuncs(source)) {
return false;
}
pos = 0;
// Find all available metadata structures
for (size_t i = 0; i < funcs_.size(); ++i) {
char funcName[256];
ILFunc::SourceRange range;
// Find function metadata start
range.begin_ = pos = source.find(";ARGSTART:", pos);
if (pos == std::string::npos) {
break;
}
// Find function metadata end
pos = source.find(";ARGEND:", pos);
if (!expect(source, &pos, ";ARGEND:")) {
break;
}
// Read the function's name
if (!getword(source, &pos, funcName)) {
return false;
}
pos = source.find_first_not_of(" \n\r", pos);
range.end_ = pos;
if (!parseFuncMetadata(source, range.begin_, range.end_)) {
return false;
}
}
return true;
}
void NullProgram::freeAllILFuncs()
{
for (size_t i = 0; i < funcs_.size(); ++i) {
delete funcs_[i];
}
funcs_.clear();
}
ILFunc*
NullProgram::findILFunc(uint index)
{
for (size_t i = 0; i < funcs_.size(); ++i) {
if (funcs_[i]->index_ == index) {
return funcs_[i];
}
}
return NULL;
}
NullKernel*
NullProgram::createKernel(
const std::string& name, const Kernel::InitData* initData,
const std::string& code, const std::string& metadata, bool* created,
const void* binaryCode, size_t binarySize)
{
amd::option::Options *options = getCompilerOptions();
uint64_t start_time = 0;
if (options->oVariables->EnableBuildTiming) {
start_time = amd::Os::timeNanos();
}
*created = false;
// Create a GPU kernel
NullKernel* gpuKernel = new NullKernel(name,
static_cast<const gpu::NullDevice&>(device()), *this);
if (gpuKernel == NULL) {
buildLog_ += "new Kernel() failed";
LogPrintfError("new Kernel() failed for kernel %s!",
name.c_str());
return NULL;
}
else if (gpuKernel->create(code, metadata, binaryCode, binarySize)) {
// Add kernel to the program
kernels()[gpuKernel->name()] = gpuKernel;
buildLog_ += gpuKernel->buildLog();
}
else {
buildError_ = gpuKernel->buildError();
buildLog_ += gpuKernel->buildLog();
delete gpuKernel;
LogPrintfError("Kernel creation failed for kernel %s!", name.c_str());
return NULL;
}
if (options->oVariables->EnableBuildTiming) {
std::stringstream tmp_ss;
tmp_ss << " Time for creating kernel ("
<< name << ") : "
<< (amd::Os::timeNanos() - start_time)/1000ULL
<< " us\n";
buildLog_ += tmp_ss.str();
}
*created = true;
return gpuKernel;
}
// Invoked from ClBinary
bool
NullProgram::getAllKernelILs(std::map<std::string, std::string>& allKernelILs,
std::string& programIL, const char* ilKernelName)
{
llvm::CompUnit compunit (programIL);
if (ilKernelName != NULL) {
std::string MangeledName("__OpenCL_");
MangeledName.append(ilKernelName);
MangeledName.append("_kernel");
for (int i=0; i < static_cast<int>(compunit.getNumKernels()); ++i) {
std::string kernelname = compunit.getKernelName(i);
if (kernelname.compare(MangeledName) == 0) {
allKernelILs[kernelname] = compunit.getKernelStr(i);
break;
}
}
}
else {
for (int i=0; i < static_cast<int>(compunit.getNumKernels()); ++i) {
std::string kernelname = compunit.getKernelName(i);
allKernelILs[kernelname] = compunit.getKernelStr(i);
}
}
return true;
}
bool
NullProgram::createBinary(amd::option::Options* options)
{
if (options->oVariables->BinBIF30) {
return true;
}
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt,
type())) {
LogError("Failed to create ELF binary image!");
return false;
}
return true;
}
Program::~Program()
{
// Destroy the global HW constant buffers
const Program::HwConstBuffers& gds = glbHwCb();
for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) {
delete it->second;
}
// Destroy the global data store
if (glbData_ != NULL) {
delete glbData_;
}
}
bool
Program::allocGlobalData(const void* globalData, size_t dataSize, uint index)
{
bool result = false;
gpu::Memory* dataStore = NULL;
if (index == 0) {
// We have to lock the heap block allocation,
// so possible reallocation won't occur twice or
// another thread could destroy a heap block,
// while we didn't finish allocation
amd::ScopedLock k(dev().lockAsyncOps());
// Allocate memory for the global data store
glbData_ = dev().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
dataStore = glbData_;
}
else {
dataStore = new Memory(dev(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
// Initialize constant buffer
if ((dataStore == NULL) || !dataStore->create(Resource::RemoteUSWC)) {
delete dataStore;
}
else {
constBufs_[index] = dataStore;
glbCb_.push_back(index);
}
}
if (dataStore != NULL) {
// Upload data to GPU memory
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
amd::Coord3D region(dataSize);
result = dev().xferMgr().writeBuffer(globalData,
*dataStore, origin, region, Entire);
}
return result;
}
bool
Program::loadBinary(bool* hasRecompile)
{
if (clBinary()->loadKernels(*this, hasRecompile)) {
// Load the global data
if (clBinary()->loadGlobalData(*this)) {
return true;
}
}
// Make sure that kernels that have been generated so far shall be deleted.
clear();
return false;
}
HSAILProgram::HSAILProgram(Device& device)
: Program(device)
, llvmBinary_()
, binaryElf_(NULL)
, rawBinary_(NULL)
, globalStore_(NULL)
, kernels_(NULL)
, maxScratchRegs_(0)
{
memset(&binOpts_, 0, sizeof(binOpts_));
binOpts_.struct_size = sizeof(binOpts_);
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
binOpts_.bitness = ELFDATA2LSB;
binOpts_.alloc = &::malloc;
binOpts_.dealloc = &::free;
}
HSAILProgram::~HSAILProgram()
{
// Destroy internal static samplers
for (auto it = staticSamplers_.begin(); it != staticSamplers_.end(); ++it) {
delete *it;
}
if (rawBinary_ != NULL) {
free( rawBinary_ );
}
acl_error error;
// Free the elf binary
if (binaryElf_ != NULL) {
error = aclBinaryFini(binaryElf_);
if (error != ACL_SUCCESS) {
LogWarning( "Error while destroying the acl binary \n" );
}
}
delete globalStore_;
delete kernels_;
}
bool
HSAILProgram::initBuild(amd::option::Options *options)
{
if (!device::Program::initBuild(options)) {
return false;
}
const char* devName = dev().hwInfo()->machineTarget_;
options->setPerBuildInfo(
(devName && (devName[0] != '\0')) ? devName : "gpu",
clBinary()->getEncryptCode(), true);
// Elf Binary setup
std::string outFileName;
// true means fsail required
clBinary()->init(options, true);
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
outFileName = options->getDumpFileName(".bin");
}
if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
(outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
LogError("Setup elf out for gpu failed");
return false;
}
return true;
}
bool
HSAILProgram::finiBuild(bool isBuildGood)
{
clBinary()->resetElfOut();
clBinary()->resetElfIn();
if (!isBuildGood) {
// Prevent the encrypted binary form leaking out
clBinary()->setBinary(NULL, 0);
}
return device::Program::finiBuild(isBuildGood);
}
bool
HSAILProgram::linkImpl(
const std::vector<device::Program *> &inputPrograms,
amd::option::Options *options,
bool createLibrary)
{
std::vector<device::Program *>::const_iterator it
= inputPrograms.begin();
std::vector<device::Program *>::const_iterator itEnd
= inputPrograms.end();
acl_error errorCode;
// For each program we need to extract the LLVMIR and create
// aclBinary for each
std::vector<aclBinary *> binaries_to_link;
for (size_t i = 0; it != itEnd; ++it, ++i) {
HSAILProgram *program = (HSAILProgram *)*it;
// Check if the program was created with clCreateProgramWIthBinary
binary_t binary = program->binary();
if ((binary.first != NULL) && (binary.second > 0)) {
// Binary already exists -- we can also check if there is no
// opencl source code
// Need to check if LLVMIR exists in the binary
// If LLVMIR does not exist then is it valid
// We need to pull out all the compiled kernels
// We cannot do this at present because we need at least
// Hsail text to pull the kernels oout
void *mem = const_cast<void *>(binary.first);
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
if (errorCode != ACL_SUCCESS) {
LogWarning("Error while linking : Could not read from raw binary");
return false;
}
}
// At this stage each HSAILProgram contains a valid binary_elf
// Check if LLVMIR is in the binary
// @TODO - Memory leak , cannot free this buffer
// need to fix this.. File EPR on compiler library
size_t llvmirSize = 0;
const void *llvmirText = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &llvmirSize, aclLLVMIR, &errorCode);
if (errorCode != ACL_SUCCESS) {
buildLog_ +="Error while linking : \
Invalid binary (Missing LLVMIR section)" ;
return false;
}
// Create a new aclBinary for each LLVMIR and save it in a list
aclBIFVersion ver = aclBinaryVersion(binaryElf_);
aclBinary *bin = aclCreateFromBinary(binaryElf_, ver);
binaries_to_link.push_back(bin);
}
// At this stage each HSAILProgram in the list has an aclBinary initialized
// and contains LLVMIR
// We can now go ahead and link them.
if (binaries_to_link.size() > 1) {
errorCode = aclLink(dev().hsaCompiler(),
binaries_to_link[0], binaries_to_link.size() - 1,
&binaries_to_link[1], ACL_TYPE_LLVMIR_BINARY, "-create-library", NULL);
}
// Store the newly linked aclBinary for this program.
binaryElf_ = binaries_to_link[0];
// Free all the other aclBinaries
for (size_t i = 1; i < binaries_to_link.size(); i++) {
aclBinaryFini(binaries_to_link[i]);
}
// Now call linkImpl with the new options
return linkImpl(options);
}
aclType
HSAILProgram::getNextCompilationStageFromBinary()
{
acl_error errorCode;
size_t secSize = 0;
aclType from = ACL_TYPE_DEFAULT;
// Checking llvmir in .llvmir section
bool isLlvmirText = true;
const void *llvmirText = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclLLVMIR, &errorCode);
if (errorCode != ACL_SUCCESS) {
isLlvmirText = false;
}
// Checking compile & link options in .comment section
bool isOpts = true;
const void* opts = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclCOMMENT, &errorCode);
if (errorCode != ACL_SUCCESS) {
isOpts = false;
}
if (isLlvmirText) {
from = ACL_TYPE_LLVMIR_BINARY;
}
else {
if (!isLlvmirText) {
buildLog_ +="Error while linking : \
Invalid binary (Missing LLVMIR section)\n" ;
}
if (!isOpts) {
buildLog_ +="Warning while linking : \
Invalid binary (Missing COMMENT section)\n" ;
}
return ACL_TYPE_DEFAULT;
}
bool isHsailText = true;
// Checking HSAIL in .cg section
const void *hsailText = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclCODEGEN, &errorCode);
if (errorCode != ACL_SUCCESS) {
isHsailText = false;
}
// Checking BRIG STRTAB in .brig_strtab section
bool isBrigStrtab = true;
const void *brigStrtab = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclBRIGstrs, &errorCode);
if (errorCode != ACL_SUCCESS) {
isBrigStrtab = false;
}
// Checking BRIG CODE in .brig_code section
bool isBrigCode = true;
const void *brigCode = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclBRIGcode, &errorCode);
if (errorCode != ACL_SUCCESS) {
isBrigCode = false;
}
// Checking BRIG OPERANDS in .brig_operands section
bool isBrigOps = true;
const void *brigOps = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclBRIGoprs, &errorCode);
if (errorCode != ACL_SUCCESS) {
isBrigOps = false;
}
if (isHsailText && isBrigStrtab && isBrigCode && isBrigOps) {
from = ACL_TYPE_HSAIL_BINARY;
}
else if (!isHsailText && !isBrigStrtab && !isBrigCode && !isBrigOps) {
from = ACL_TYPE_LLVMIR_BINARY;
}
else {
if (!isHsailText) {
buildLog_ +="Error while linking : \
Invalid binary (Missing CG section)\n" ;
}
if (!isBrigStrtab) {
buildLog_ +="Error while linking : \
Invalid binary (Missing BRIG_STRTAB section)\n" ;
}
if (!isBrigCode) {
buildLog_ +="Error while linking : \
Invalid binary (Missing BRIG_CODE section)\n" ;
}
if (!isBrigOps) {
buildLog_ +="Error while linking : \
Invalid binary (Missing BRIG_OPERANDS section)\n" ;
}
return ACL_TYPE_DEFAULT;
}
// Checking ISA in .text section
bool isShaderIsa = true;
const void *shaderIsa = aclExtractSection(dev().hsaCompiler(),
binaryElf_, &secSize, aclTEXT, &errorCode);
if (errorCode != ACL_SUCCESS) {
isShaderIsa = false;
}
if (isShaderIsa && from == ACL_TYPE_LLVMIR_BINARY) {
from = ACL_TYPE_DEFAULT;
}
return from;
}
bool
HSAILProgram::linkImpl(amd::option::Options* options)
{
acl_error errorCode;
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
//If the binaryElf_ is not set then program must have been created
// using clCreateProgramWithBinary
if (!binaryElf_) {
binary_t binary = this->binary();
if ((binary.first != NULL) && (binary.second > 0)) {
// Binary already exists -- we can also check if there is no
// opencl source code
// Need to check if LLVMIR exists in the binary
// If LLVMIR does not exist then is it valid
// We need to pull out all the compiled kernels
// We cannot do this at present because we need at least
// Hsail text to pull the kernels oout
void *mem = const_cast<void *>(binary.first);
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while converting to BRIG: aclBinary init failure \n" ;
LogWarning("aclBinaryInit failed");
return false;
}
// Check that all needed section also exist in binaryElf_
// No any validity checks here
continueCompileFrom = getNextCompilationStageFromBinary();
if (ACL_TYPE_DEFAULT == continueCompileFrom) {
return false;
}
if (ACL_TYPE_HSAIL_BINARY == continueCompileFrom) {
// Save binary in the interface class
// Also load compile & link options from binary into Program class members:
// compileOptions_ & linkOptions_
setBinary(static_cast<char*>(mem), binary.second);
// Compare options loaded from binary with current ones
// If they differ then recompile from ACL_TYPE_LLVMIR_BINARY
// @TODO It is needed to compare options taking into account that:
// 1. options are order independent;
// 2. (may be not trivial) compare only options that affect binary
std::string curOptions = options->origOptionStr + hsailOptions();
if (compileOptions_ + linkOptions_ != curOptions) {
continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
}
}
}
}
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
// 1. if the program is not created with binary;
// 2. if the program is created with binary and contains only .llvmir & .comment
// 3. if the program is created with binary, contains all brig sections,
// but the binary's compile & link options differ from current ones (recompilation);
if (ACL_TYPE_LLVMIR_BINARY == continueCompileFrom) {
std::string curOptions = options->origOptionStr + hsailOptions();
errorCode = aclCompile(dev().hsaCompiler(), binaryElf_,
curOptions.c_str(), ACL_TYPE_LLVMIR_BINARY, ACL_TYPE_CG, NULL);
}
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while converting to BRIG: Compiling LLVMIR to BRIG \n" ;
return false;
}
size_t fsailSize;
const void *hsailText = aclExtractSection(dev().hsaCompiler(),
binaryElf_,
&fsailSize,
aclCODEGEN,
&errorCode);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while reading out the HSAIL from the ELF" ;
return false;
}
if (!aclHsaLoader(dev().hsaCompiler(), binaryElf_, this, &AllocateGPUMemory,
&DmaMemoryCopy, &GetSamplerObjectParams, &InitializeSamplerObject)) {
buildLog_ += "Error while loading BRIG globals in the ELF";
return false;
}
std::string hsailProgram((char *)hsailText);
HSAILProgram_ = hsailProgram;
// We pull out all the kernel names in a very ugly manner
//! \todo check if this has been fixed in the compiler library
if (!HSAILProgram_.empty()) {
bool dynamicParallelism = false;
// Find out the name of the kernel. Works for multiple kernels
int pos = 0;
while (true) {
std::string findString = "kernel &";
size_t kernelNPos = HSAILProgram_.find(findString, pos);
if (kernelNPos == std::string::npos) {
break;
}
size_t kernelEndNPos = HSAILProgram_.find("l(", kernelNPos);
pos = kernelEndNPos + 1;
if (kernelEndNPos == std::string::npos) {
break;
}
// "kernel &" is 8
// "__OpenCL_" is 9
// "_kerne" is 6
// We can drop all this with a compiler tweak later
std::string kernelName = HSAILProgram_.substr(kernelNPos + 8 + 9,
kernelEndNPos -
(kernelNPos + 8 + 9) - 6);
HSAILKernel *aKernel = new HSAILKernel(kernelName, this,
options->origOptionStr + hsailOptions());
if (!aKernel->init() ) {
return false;
}
aKernel->setUniformWorkGroupSize(options
->oVariables->UniformWorkGroupSize);
kernels()[kernelName] = aKernel;
dynamicParallelism |= aKernel->dynamicParallelism();
// Find max scratch regs used in the program
// It's used for scratch buffer preallocation with
// dynamic parallelism, since runtime doesn't know
// which child kernel will be called
maxScratchRegs_ = std::max(
static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_),
maxScratchRegs_);
}
// Allocate kernel table for device enqueuing
if (dynamicParallelism && !allocKernelTable()) {
return false;
}
// Save the binary in the interface class
size_t size = 0;
void *mem = NULL;
aclWriteToMem(binaryElf_, &mem, &size);
setBinary(static_cast<char*>(mem), size);
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
return true;
}
return false;
}
bool
HSAILProgram::createBinary(amd::option::Options *options)
{
return false;
}
bool
HSAILProgram::initClBinary()
{
if (clBinary_ == NULL) {
clBinary_ = new ClBinaryHsa(static_cast<const Device &>(device()));
if (clBinary_ == NULL) {
return false;
}
}
return true;
}
void
HSAILProgram::releaseClBinary()
{
if (clBinary_ != NULL) {
delete clBinary_;
clBinary_ = NULL;
}
}
std::string
HSAILProgram::hsailOptions()
{
std::string hsailOptions;
// Set options for the standard device specific options
// All our devices support these options now
if (dev().settings().reportFMAF_) {
hsailOptions.append(" -DFP_FAST_FMAF=1");
}
if (dev().settings().reportFMA_) {
hsailOptions.append(" -DFP_FAST_FMA=1");
}
// Check if the host is 64 bit or 32 bit
LP64_ONLY(hsailOptions.append(" -m64"));
// Append each extension supported by the device
std::string token;
std::istringstream iss("");
iss.str(device().info().extensions_);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
hsailOptions.append(" -D");
hsailOptions.append(token);
hsailOptions.append("=1");
}
}
return hsailOptions;
}
bool
HSAILProgram::allocKernelTable()
{
uint size = kernels().size() * sizeof(size_t);
kernels_ = new gpu::Memory(dev(), size);
// Initialize kernel table
if ((kernels_ == NULL) || !kernels_->create(Resource::RemoteUSWC)) {
delete kernels_;
return false;
}
else {
size_t* table = reinterpret_cast<size_t*>(
kernels_->map(NULL, gpu::Resource::WriteOnly));
for (auto it = kernels().begin(); it != kernels().end(); ++it) {
HSAILKernel* kernel = static_cast<HSAILKernel*>(it->second);
table[kernel->index()] = static_cast<size_t>(
kernel->gpuAqlCode()->vmAddress());
}
kernels_->unmap(NULL);
}
return true;
}
void
HSAILProgram::fillResListWithKernels(
std::vector<const Resource*>& memList) const
{
for (auto it = kernels().begin(); it != kernels().end(); ++it) {
memList.push_back(
static_cast<HSAILKernel*>(it->second)->gpuAqlCode());
}
}
} // namespace gpu