From 02fbea29d6b980808a85f9d26c86363d184cffa0 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 3 Oct 2019 17:12:38 -0400
Subject: [PATCH] P4 to Git Change 2008325 by gandryey@gera-win10 on 2019/10/03
17:06:49
SWDEV-204999 - [hipclang-vdi-rocm] TF unit test tracking.util_xla_test_gpu fails to run
- Change the HSACO detection logic to use e_machine
- Allow to load a binary without any kernel.
Affected files ...
... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#63 edit
---
rocclr/compiler/lib/loaders/elf/elf.hpp | 2 ++
rocclr/runtime/device/devprogram.cpp | 6 ++++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/rocclr/compiler/lib/loaders/elf/elf.hpp b/rocclr/compiler/lib/loaders/elf/elf.hpp
index 3964aa30bb..943bf55bc7 100644
--- a/rocclr/compiler/lib/loaders/elf/elf.hpp
+++ b/rocclr/compiler/lib/loaders/elf/elf.hpp
@@ -334,6 +334,8 @@ public:
const char* getErrMsg() { return _err.getOclElfError(); }
unsigned char getELFClass() { return _eclass; }
+ bool isHsaCo() const { return (elf64_getehdr(_e)->e_machine == EM_AMDGPU); }
+
private:
/* Initialization */
diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp
index bf78847f5c..6fffda2bf1 100644
--- a/rocclr/runtime/device/devprogram.cpp
+++ b/rocclr/runtime/device/devprogram.cpp
@@ -2574,8 +2574,7 @@ bool Program::setBinary(const char* binaryIn, size_t size) {
case ET_DYN: {
char* sect = nullptr;
size_t sz = 0;
- // FIXME: we should look for the e_machine to detect an HSACO.
- if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) {
+ if (clBinary()->elfIn()->isHsaCo()) {
setType(TYPE_EXECUTABLE);
} else {
setType(TYPE_LIBRARY);
@@ -2910,6 +2909,9 @@ bool Program::createKernelMetadataMap() {
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::get_metadata_list_size(kernelsMD, &size);
+ } else if (amd::IS_HIP) {
+ // Assume an empty binary. HIP may have binaries with just global variables
+ return true;
}
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {