From 02fbea29d6b980808a85f9d26c86363d184cffa0 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 3 Oct 2019 17:12:38 -0400 Subject: [PATCH] P4 to Git Change 2008325 by gandryey@gera-win10 on 2019/10/03 17:06:49 SWDEV-204999 - [hipclang-vdi-rocm] TF unit test tracking.util_xla_test_gpu fails to run - Change the HSACO detection logic to use e_machine - Allow to load a binary without any kernel. Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#63 edit --- rocclr/compiler/lib/loaders/elf/elf.hpp | 2 ++ rocclr/runtime/device/devprogram.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rocclr/compiler/lib/loaders/elf/elf.hpp b/rocclr/compiler/lib/loaders/elf/elf.hpp index 3964aa30bb..943bf55bc7 100644 --- a/rocclr/compiler/lib/loaders/elf/elf.hpp +++ b/rocclr/compiler/lib/loaders/elf/elf.hpp @@ -334,6 +334,8 @@ public: const char* getErrMsg() { return _err.getOclElfError(); } unsigned char getELFClass() { return _eclass; } + bool isHsaCo() const { return (elf64_getehdr(_e)->e_machine == EM_AMDGPU); } + private: /* Initialization */ diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp index bf78847f5c..6fffda2bf1 100644 --- a/rocclr/runtime/device/devprogram.cpp +++ b/rocclr/runtime/device/devprogram.cpp @@ -2574,8 +2574,7 @@ bool Program::setBinary(const char* binaryIn, size_t size) { case ET_DYN: { char* sect = nullptr; size_t sz = 0; - // FIXME: we should look for the e_machine to detect an HSACO. - if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) { + if (clBinary()->elfIn()->isHsaCo()) { setType(TYPE_EXECUTABLE); } else { setType(TYPE_LIBRARY); @@ -2910,6 +2909,9 @@ bool Program::createKernelMetadataMap() { if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::get_metadata_list_size(kernelsMD, &size); + } else if (amd::IS_HIP) { + // Assume an empty binary. HIP may have binaries with just global variables + return true; } for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {