SWDEV-540645 - use single path for code object loading (#773)

* use single path for code object loading
* Remove printf
* Address review comments
* Fix the declaration of ihipMallocManaged

[ROCm/clr commit: d7f90a3120]
Este commit está contenido en:
Chaudhary, Jatin Jaikishan
2025-08-12 10:45:18 +01:00
cometido por GitHub
padre 0e0ca48a6a
commit 7fe61f9556
Se han modificado 5 ficheros con 503 adiciones y 1201 borrados
+2 -580
Ver fichero
@@ -35,584 +35,6 @@ namespace hip {
hipError_t ihipFree(void* ptr);
// forward declaration of methods required for managed variables
hipError_t ihipMallocManaged(void** ptr, size_t size, size_t align = 0, bool use_host_ptr = 0);
namespace {
// In uncompressed mode
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
sizeof(kOffloadBundleUncompressedMagicStr);
// In compressed mode
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
sizeof(kOffloadBundleCompressedMagicStr);
constexpr char kOffloadKindHip[] = "hip";
constexpr char kOffloadKindHipv4[] = "hipv4";
constexpr char kOffloadKindHcc[] = "hcc";
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
constexpr char kHipFatBinName[] = "hipfatbin";
constexpr char kHipFatBinName_[] = "hipfatbin-";
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
// Clang Offload bundler description & Header in uncompressed mode.
struct __ClangOffloadBundleInfo {
uint64_t offset;
uint64_t size;
uint64_t bundleEntryIdSize;
const char bundleEntryId[1];
};
struct __ClangOffloadBundleUncompressedHeader {
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
uint64_t numOfCodeObjects;
__ClangOffloadBundleInfo desc[1];
};
// Clang Offload bundler description & Header in compressed mode.
struct __ClangOffloadBundleCompressedHeader {
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
uint16_t versionNumber;
uint16_t compressionMethod;
uint32_t totalSize;
uint32_t uncompressedBinarySize;
uint64_t Hash;
const char compressedBinarydesc[1];
};
} // namespace
bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) {
std::string magic(reinterpret_cast<const char*>(data),
kOffloadBundleUncompressedMagicStrSize - 1);
if (!magic.compare(kOffloadBundleUncompressedMagicStr)) {
isCompressed = false;
return true;
}
std::string magic1(reinterpret_cast<const char*>(data),
kOffloadBundleCompressedMagicStrSize - 1);
if (!magic1.compare(kOffloadBundleCompressedMagicStr)) {
isCompressed = true;
return true;
}
return false;
}
uint32_t CodeObject::getGenericVersion(const void* image) {
const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(image);
return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA &&
ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ?
((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) : 0;
}
bool CodeObject::isGenericTarget(const void* image) {
return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN;
}
bool CodeObject::containGenericTarget(const void *data) {
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const auto* desc = &obheader->desc[0];
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize)) {
if (desc->size == 0) continue;
const void* image =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
if (isGenericTarget(image)) {
return true;
}
}
return false;
}
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
// Consumes the string 'consume_' from the starting of the given input
// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa--
// input will become gfx908.
static bool consume(std::string& input, std::string consume_) {
if (input.substr(0, consume_.size()) != consume_) {
return false;
}
input = input.substr(consume_.size());
return true;
}
// Trim String till character, will be used to get gpuname
// example: input is gfx908:sram-ecc+ and trim char is :
// input will become :sram-ecc+.
static std::string trimName(std::string& input, char trim) {
auto pos_ = input.find(trim);
auto res = input;
if (pos_ == std::string::npos) {
input = "";
} else {
res = input.substr(0, pos_);
input = input.substr(pos_);
}
return res;
}
// Trim String till character, will be used to get bundle entry ID.
// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is .
// input will become amdgcn-amd-amdhsa--gfx1035
static bool trimNameTail(std::string& input, char trim) {
auto pos_ = input.rfind(trim);
if (pos_ == std::string::npos) {
return false;
}
input = input.substr(0, pos_);
return true;
}
static char getFeatureValue(std::string& input, std::string feature) {
char res = ' ';
if (consume(input, std::move(feature))) {
res = input[0];
input = input.substr(1);
}
return res;
}
static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value,
char& xnack_value) {
processor = trimName(input, ':');
sramecc_value = getFeatureValue(input, std::string(":sramecc"));
if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false;
xnack_value = getFeatureValue(input, std::string(":xnack"));
if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false;
return true;
}
static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
std::string agent_triple_target_id, unsigned int genericVersion) {
// Primitive Check
if (co_triple_target_id == agent_triple_target_id) return true;
// Parse code object triple target id
if (!consume(co_triple_target_id, std::string(kAmdgcnTargetTriple) + '-')) {
return false;
}
std::string co_processor;
char co_sram_ecc, co_xnack;
if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) {
return false;
}
if (!co_triple_target_id.empty()) return false;
// Parse agent isa triple target id
if (!consume(agent_triple_target_id, std::string(kAmdgcnTargetTriple) + '-')) {
return false;
}
std::string agent_isa_processor;
char isa_sram_ecc, isa_xnack;
if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) {
return false;
}
if (!agent_triple_target_id.empty()) return false;
// Check for compatibility
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
// co_processor is generic target
if (!helpers::IsCompatibleWithGenericTarget(co_processor, agent_isa_processor))
return false;
} else if (agent_isa_processor != co_processor) {
return false;
}
if (co_sram_ecc != ' ') {
if (co_sram_ecc != isa_sram_ecc) return false;
}
if (co_xnack != ' ') {
if (co_xnack != isa_xnack) return false;
}
return true;
}
bool CodeObject::QueryGenericTarget(std::string agentTarget, std::string& processor,
char& sram_ecc, char& xnack) {
static const std::string head = std::string(kAmdgcnTargetTriple) + '-';
// Parse agent isa triple target id
if (!consume(agentTarget, head)) {
return false;
}
if (!getTargetIDValue(agentTarget, processor, sram_ecc, xnack)) {
return false;
}
if (processor.empty()) return false;
auto &map = helpers::GenericTargetMapping();
auto search = map.find(processor);
if (search == map.end()) return false;
processor = head + search->second;
return true;
}
// ================================================================================================
size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) {
if (isCompressed) {
const auto obheader = reinterpret_cast<const __ClangOffloadBundleCompressedHeader*>(data);
return obheader->totalSize;
} else {
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const __ClangOffloadBundleInfo* desc = &obheader->desc[0];
uint64_t i = 0;
while (++i < obheader->numOfCodeObjects) {
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize);
}
return desc->offset + desc->size;
}
}
// ================================================================================================
hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
const void* data, size_t size, const std::vector<std::string>& agent_triple_target_ids,
std::vector<std::pair<const void*, size_t>>& code_objs) {
hipError_t hipStatus = hipSuccess;
amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS;
const size_t num_devices = agent_triple_target_ids.size();
size_t num_code_objs = num_devices;
bool isCompressed = false;
if (!IsClangOffloadMagicBundle(data, isCompressed)) {
ClPrint(amd::LOG_INFO, amd::LOG_COMGR, "IsClangOffloadMagicBundle(%p) return false", data);
// hipModuleLoadData() will possibly call here
return hipErrorInvalidKernelFile;
}
if (size == 0) size = getFatbinSize(data, isCompressed);
amd_comgr_data_t dataCodeObj{0};
amd_comgr_data_set_t dataSetBundled{0};
amd_comgr_data_set_t dataSetUnbundled{0};
amd_comgr_action_info_t actionInfoUnbundle{0};
amd_comgr_data_t item{0};
std::set<std::string> devicesSet{}; // To make sure device is unique
std::set<std::string> genericDevicesSet{}; // Used to record generic targets
std::vector<const char*> bundleEntryIDs{};
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
for (size_t i = 0; i < num_devices; i++) {
auto res = devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
if (res.second) {
// This is a new device in devicesSet
bundleEntryIDs.push_back(res.first->c_str());
std::string processor;
char sram_ecc = ' ', xnack = ' ';
if (!QueryGenericTarget(agent_triple_target_ids[i], processor, sram_ecc, xnack)) {
continue; // No generic target for this device
}
// Now processor is generic such as
// amdgcn-amd-amdhsa--gfx9-4-generic, amdgcn-amd-amdhsa--gfx11-generic
processor = hipv4 + processor;
auto ret = genericDevicesSet.insert(processor);
if (ret.second) {
// Without feature
bundleEntryIDs.push_back(ret.first->c_str());
}
if (xnack != ' ') {
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
if (ret.second) {
// Generic target with xnack feature
bundleEntryIDs.push_back(ret.first->c_str());
}
}
if (sram_ecc != ' ') {
processor += ":sramecc";
processor += sram_ecc;
ret = genericDevicesSet.insert(processor);
if (ret.second) {
// Generic target with sramecc feature
bundleEntryIDs.push_back(ret.first->c_str());
}
if (xnack != ' ') {
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
if (ret.second) {
// Generic target with sramecc and xnack features
bundleEntryIDs.push_back(ret.first->c_str());
}
}
}
}
}
do {
// Create Bundled dataset
comgrStatus = amd::Comgr::create_data_set(&dataSetBundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// CodeObject
comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError(
"amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast<const char*>(data));
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError(
"amd::Comgr::set_data_name("
") failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Set up ActionInfo
comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids(
actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size());
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError(
"amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh",
bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Unbundle
comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle,
dataSetBundled, dataSetUnbundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Check CodeObject count
size_t count = 0;
comgrStatus =
amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Initialize Code objects
code_objs.reserve(num_code_objs);
for (size_t i = 0; i < num_code_objs; i++) {
code_objs.push_back(std::make_pair(nullptr, 0));
}
for (size_t i = 0; i < count; i++) {
if (num_code_objs == 0) break;
size_t itemSize = 0;
comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled,
AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
std::string bundleEntryId(itemSize, 0);
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data());
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count,
itemSize, comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "Found bundleEntryId=%s", bundleEntryId.c_str());
// Remove bundleEntryId_
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
// This is behavour in comgr unbundling which is subject to change.
// So just give info.
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
"bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(),
kOffloadHipV4FatBinName_);
}
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
// Currently we only support EF_AMDGPU_GENERIC_VERSION_MIN on generic target
uint32_t genericVersion =
bundleEntryId.find("generic") != bundleEntryId.npos ? EF_AMDGPU_GENERIC_VERSION_MIN : 0;
char* itemData = nullptr;
for (size_t dev = 0; dev < num_devices; ++dev) {
if (code_objs[dev].first != nullptr) {
if (!isGenericTarget(code_objs[dev].first)) {
continue; // Specific target already found
} else if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
continue; // Generic target already found, no need to check another generic
}
}
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "agent_triple_target_ids[%zu]=%s, bundleEntryId=%s",
dev, agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
if (isCodeObjectCompatibleWithDevice(bundleEntryId, agent_triple_target_ids[dev],
genericVersion)) {
if (itemData == nullptr) {
itemSize = 0;
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
if (itemSize == 0) {
// If there isn't a code object for this device,
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
// valid name but no data. We need continue searching for other devices
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev,
agent_triple_target_ids[dev].c_str());
break;
}
// itemData should be deleted in fatbin's destructor
itemData = new char[itemSize];
if (itemData == nullptr) {
LogError("no enough memory");
hipStatus = hipErrorOutOfMemory;
break;
}
comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count,
itemSize, comgrStatus);
hipStatus = hipErrorInvalidValue;
delete[] itemData;
itemData = nullptr;
break;
}
}
if (code_objs[dev].first != nullptr) {
// This must be data of generic target
bool used = false; // Still used by other devices?
for (size_t i = 0; i < num_devices; ++i) {
if (dev != i && code_objs[dev].first == code_objs[i].first) {
used = true;
break;
}
}
if (!used) {
delete[] reinterpret_cast<const char*>(code_objs[dev].first);
}
} else {
--num_code_objs;
}
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR,
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s, %s), "
"Size=%zu, num_code_objs=%zu",
dev, agent_triple_target_ids[dev].c_str(), itemData,
isCompressed ? "compressed" : "uncompressed",
genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN ? "generic" : "non-generic",
itemSize, num_code_objs);
}
}
comgrStatus = amd::Comgr::release_data(item);
item.handle = 0;
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
}
if (hipStatus != hipSuccess) break;
}
} while (0);
// Cleanup
if (actionInfoUnbundle.handle) {
comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (dataSetBundled.handle) {
comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (dataSetUnbundled.handle) {
comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (dataCodeObj.handle) {
comgrStatus = amd::Comgr::release_data(dataCodeObj);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (item.handle) {
comgrStatus = amd::Comgr::release_data(item);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
return hipStatus;
}
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
amd::ScopedLock lock(dclock_);
@@ -620,7 +42,7 @@ hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
// Number of devices = 1 in dynamic code object
fb_info_ = new FatBinaryInfo(fname, image);
std::vector<hip::Device*> devices = {g_devices[ihipGetDevice()]};
IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices));
IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinaryUsingCOMGR(devices));
// No Lazy loading for DynCO
IHIP_RETURN_ONFAIL(fb_info_->BuildProgram(ihipGetDevice()));
@@ -834,7 +256,7 @@ hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) {
// Create a new fat binary object and extract the fat binary for all devices.
FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo(nullptr, data);
hipError_t err = fatBinaryInfo->ExtractFatBinary(g_devices);
hipError_t err = fatBinaryInfo->ExtractFatBinaryUsingCOMGR(g_devices);
programs = fatBinaryInfo;
return err;
}
+47 -45
Ver fichero
@@ -35,6 +35,52 @@ THE SOFTWARE.
#include "platform/program.hpp"
namespace hip {
namespace symbols {
// In uncompressed mode
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
sizeof(kOffloadBundleUncompressedMagicStr);
// In compressed mode
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
sizeof(kOffloadBundleCompressedMagicStr);
constexpr char kOffloadKindHip[] = "hip";
constexpr char kOffloadKindHipv4[] = "hipv4";
constexpr char kOffloadKindHcc[] = "hcc";
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
constexpr char kHipFatBinName[] = "hipfatbin";
constexpr char kHipFatBinName_[] = "hipfatbin-";
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
// Clang Offload bundler description & Header in uncompressed mode.
struct ClangOffloadBundleInfo {
uint64_t offset;
uint64_t size;
uint64_t bundleEntryIdSize;
const char bundleEntryId[1];
};
struct ClangOffloadBundleUncompressedHeader {
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
uint64_t numOfCodeObjects;
ClangOffloadBundleInfo desc[1];
};
// Clang Offload bundler description & Header in compressed mode.
struct ClangOffloadBundleCompressedHeader {
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
uint16_t versionNumber;
uint16_t compressionMethod;
uint32_t totalSize;
uint32_t uncompressedBinarySize;
uint64_t Hash;
const char compressedBinarydesc[1];
};
} // namespace symbols
//Forward Declaration for friend usage
class PlatformState;
@@ -43,50 +89,6 @@ class CodeObject {
public:
virtual ~CodeObject() {}
// Functions to add_dev_prog and build
static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr,
size_t binary_size);
static hipError_t build_module(hipModule_t hmod, const std::vector<amd::Device*>& devices);
static uint64_t ElfSize(const void* emi);
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
static uint32_t getGenericVersion(const void* image);
static bool isGenericTarget(const void* image);
static bool containGenericTarget(const void *data);
// Return size of fat bin
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
/**
* @brief Extract code object from fatbin using comgr unbundling action
*
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
* compressed and even SPIR-V(to be supported later) mode.
* @param[in] size the size of the bundle data
* @param[in] agent_triple_target_ids isa names of concerned devices
* @param[out] code_objs the buffer address and size pairs of extracted code objects of
* concerned devices
* Returned error code
*
* @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue,
* #hipErrorNoBinaryForGpu
*
* @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
* const std::vector<hip::Device*>& devices)
*/
static hipError_t extractCodeObjectFromFatBinaryUsingComgr(
const void* data, size_t size, const std::vector<std::string>& devices,
std::vector<std::pair<const void*, size_t>>& code_objs);
// Query the generic target of agent target.
// Return true on successfull query, false on failure
static bool QueryGenericTarget(std::string agentTarget, std::string& processor, char& sram_ecc,
char& xnack);
protected:
CodeObject() {}
@@ -185,6 +187,6 @@ private:
std::unordered_map<int, bool> managedVarsDevicePtrInitalized_;
};
}; // namespace hip
}; // namespace hip
#endif /* HIP_CODE_OBJECT_HPP */
+1 -234
Ver fichero
@@ -21,6 +21,7 @@ THE SOFTWARE.
*/
#define LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING 1
#include "llvm/BinaryFormat/ELF.h"
#if defined(_WIN32)
#include <io.h>
#if defined(__has_attribute)
@@ -32,7 +33,6 @@ THE SOFTWARE.
using namespace llvm::ELF;
namespace hip {
std::unordered_set<LinkProgram*> LinkProgram::linker_set_;
namespace helpers {
@@ -64,239 +64,6 @@ struct __ClangOffloadBundleHeader {
__ClangOffloadBundleInfo desc[1];
};
static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported,
bool& sramEccSupported) {
switch (EFlags & EF_AMDGPU_MACH) {
case EF_AMDGPU_MACH_AMDGCN_GFX700:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx700";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX701:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx701";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX702:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx702";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX703:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx703";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX704:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx704";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX705:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx705";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX801:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx801";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX802:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx802";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX803:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx803";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX805:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx805";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX810:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx810";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX900:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx900";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX902:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx902";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX904:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx904";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX906:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx906";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX908:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx908";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX909:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx909";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX90A:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx90a";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx90c";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX942:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx942";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX950:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx950";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx1010";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx1011";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx1012";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1013:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx1013";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1030";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1031";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1032";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1033";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1034:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1034";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1035:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1035";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1036:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1036";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1100:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1100";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1101:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1101";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1102:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1102";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1103:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1103";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1150:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1150";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1151:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1151";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1200:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1200";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX1201:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1201";
case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx9-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx10-1-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx10-3-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx11-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx12-generic";
break;
default:
return false;
}
return true;
}
// Consumes the string 'consume_' from the starting of the given input
// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa--
// input will become gfx908.
+452 -322
Ver fichero
@@ -20,7 +20,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime_api.h"
#include "llvm/BinaryFormat/ELF.h"
#include "hip_fatbin.hpp"
#include "hip_global.hpp"
#include <unordered_map>
#include "hip_code_object.hpp"
@@ -28,7 +32,6 @@ THE SOFTWARE.
#include "comgrctx.hpp"
namespace hip {
namespace comgr_helper {
template <typename comgr_T> class ComgrUniqueHandle {
@@ -154,30 +157,312 @@ void ListAllDeviceWithNoCOFromBundle(
}
}
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
bool &containGenericTarget) {
amd_comgr_data_t data_object {0};
amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS;
hipError_t hip_status = hipSuccess;
static std::string TargetGenericMap(const std::string& input) {
const static std::unordered_map<std::string, std::string> target_map{
// clang-format off
{"amdgcn-amd-amdhsa--gfx900" , "amdgcn-amd-amdhsa--gfx9-generic" },
{"amdgcn-amd-amdhsa--gfx902" , "amdgcn-amd-amdhsa--gfx9-generic" },
{"amdgcn-amd-amdhsa--gfx904" , "amdgcn-amd-amdhsa--gfx9-generic" },
{"amdgcn-amd-amdhsa--gfx906" , "amdgcn-amd-amdhsa--gfx9-generic" },
{"amdgcn-amd-amdhsa--gfx909" , "amdgcn-amd-amdhsa--gfx9-generic" },
{"amdgcn-amd-amdhsa--gfx90c" , "amdgcn-amd-amdhsa--gfx9-generic" },
{"amdgcn-amd-amdhsa--gfx942" , "amdgcn-amd-amdhsa--gfx9-4-generic" },
{"amdgcn-amd-amdhsa--gfx950" , "amdgcn-amd-amdhsa--gfx9-4-generic" },
{"amdgcn-amd-amdhsa--gfx1010", "amdgcn-amd-amdhsa--gfx10-1-generic"},
{"amdgcn-amd-amdhsa--gfx1011", "amdgcn-amd-amdhsa--gfx10-1-generic"},
{"amdgcn-amd-amdhsa--gfx1012", "amdgcn-amd-amdhsa--gfx10-1-generic"},
{"amdgcn-amd-amdhsa--gfx1013", "amdgcn-amd-amdhsa--gfx10-1-generic"},
{"amdgcn-amd-amdhsa--gfx1030", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1031", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1032", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1033", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1034", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1035", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1036", "amdgcn-amd-amdhsa--gfx10-3-generic"},
{"amdgcn-amd-amdhsa--gfx1100", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1101", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1102", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1103", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1150", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1151", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1152", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1153", "amdgcn-amd-amdhsa--gfx11-generic" },
{"amdgcn-amd-amdhsa--gfx1200", "amdgcn-amd-amdhsa--gfx12-generic" },
{"amdgcn-amd-amdhsa--gfx1201", "amdgcn-amd-amdhsa--gfx12-generic" },
// clang-format on
};
if (auto i = target_map.find(input); i != target_map.end()) {
return i->second;
}
return {};
}
// If image was passed as a pointer to our hipMod* api, we can try to extract the file name
// if it was mapped by the app. Otherwise use the COMGR data API.
if (fname_.size() == 0) {
if (image_ == nullptr) {
LogError("Both Filename and image cannot be null");
return hipErrorInvalidValue;
// For sramecc and xnack
static std::string TargetFeatureCheck(const std::string& input, std::string feature) {
if (input.find(feature) != std::string::npos) {
auto feature_p = feature + "+"; // feature present eg: xnack+
auto feature_m = feature + "-"; // feature absent eg: xnack-
if (input.find(feature_p) != std::string::npos) {
return feature_p;
} else if (input.find(feature_m) != std::string::npos) {
return feature_m;
}
}
return "";
}
static std::string TargetToGeneric(std::string input) {
auto sramecc = TargetFeatureCheck(input, "sramecc");
auto xnack = TargetFeatureCheck(input, "xnack");
// Remove all features
size_t index = input.find_first_of(":");
std::string name_without_feature = input.substr(0, index);
// Look up generic name
auto generic_name = TargetGenericMap(name_without_feature);
if (generic_name.empty()) {
return generic_name; // No generic exists
}
// reappend feature
if (!sramecc.empty()) {
generic_name += ":";
generic_name += sramecc;
}
if (!xnack.empty()) {
generic_name += ":";
generic_name += xnack;
}
return generic_name;
}
static bool IsCodeObjectUncompressed(const void* image) {
return std::memcmp(image,
reinterpret_cast<const void*>(symbols::kOffloadBundleUncompressedMagicStr),
sizeof(symbols::kOffloadBundleUncompressedMagicStr) - 1) == 0;
}
static bool IsCodeObjectCompressed(const void* image) {
return std::memcmp(image,
reinterpret_cast<const void*>(symbols::kOffloadBundleCompressedMagicStr),
sizeof(symbols::kOffloadBundleCompressedMagicStr) - 1) == 0;
}
static bool IsCodeObjectElf(const void* image) {
const amd::Elf64_Ehdr* ehdr = reinterpret_cast<const amd::Elf64_Ehdr*>(image);
return ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == llvm::ELF::ELFOSABI_AMDGPU_HSA;
}
static bool UncompressAndPopulateCodeObject(
const void* image, const std::set<std::string>& unique_isa_names,
std::map<std::string, std::pair<const void*, size_t>>& code_obj_map) {
auto remove_file_extension = [](const std::string& input) -> std::string {
size_t index = input.find_last_of(".");
std::string ret = input.substr(0, index);
return ret;
};
std::vector<std::string> bundle_ids_str;
std::set<std::string> unique_ids;
for (const auto& isa_name : unique_isa_names) {
bundle_ids_str.push_back(std::string(symbols::kOffloadKindHipv4_) + isa_name);
}
std::vector<const char*> bundle_ids;
bundle_ids.reserve(bundle_ids_str.size());
for (auto& bundle_id_str : bundle_ids_str) {
bundle_ids.push_back(bundle_id_str.c_str());
}
const auto obheader =
reinterpret_cast<const symbols::ClangOffloadBundleCompressedHeader*>(image);
const size_t size = obheader->totalSize;
bool passed = false;
do {
comgr_helper::ComgrDataSetUniqueHandle bundled_co, unbundled_co;
comgr_helper::ComgrDataUniqueHandle input_bundle;
if (auto comgr_status = bundled_co.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in creating bundled_co");
break;
}
if (auto comgr_status = unbundled_co.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in creating unbundled_co");
break;
}
if (auto comgr_status = input_bundle.Create(AMD_COMGR_DATA_KIND_OBJ_BUNDLE);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in creating input bundle");
break;
}
if (auto comgr_status =
amd::Comgr::set_data(input_bundle.get(), size, static_cast<const char*>(image));
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in setting image data to bundle");
break;
}
if (auto comgr_status = amd::Comgr::set_data_name(input_bundle.get(), symbols::kHipFatBinName);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in setting image data to bundle");
break;
}
if (auto comgr_status = amd::Comgr::data_set_add(bundled_co.get(), input_bundle.get());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in adding data set");
break;
}
comgr_helper::ComgrActionInfoUniqueHandle unbundle_action;
if (auto comgr_status = unbundle_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in creating unbundle action");
break;
}
if (auto comgr_status = amd::Comgr::action_info_set_bundle_entry_ids(
unbundle_action.get(), bundle_ids.data(), bundle_ids.size());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Error in setting bundle entry ids");
break;
}
if (auto comgr_status = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, unbundle_action.get(),
bundled_co.get(), unbundled_co.get());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to unbundle code object");
break;
}
size_t count = 0;
if (auto comgr_status = amd::Comgr::action_data_count(unbundled_co.get(),
AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get data count of unbundled code object");
break;
}
for (size_t i = 0; i < count; i++) {
amd_comgr_data_t item;
if (auto comgr_status = amd::Comgr::action_data_get_data(
unbundled_co.get(), AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get data unbundled code object");
break;
}
size_t item_name_size = 0;
if (auto comgr_status = amd::Comgr::get_data_name(item, &item_name_size, nullptr);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get data size");
break;
}
std::string item_bundle_id(item_name_size, 0);
if (auto comgr_status =
amd::Comgr::get_data_name(item, &item_name_size, item_bundle_id.data());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get data");
break;
}
size_t item_size = 0;
if (auto comgr_status = amd::Comgr::get_data(item, &item_size, nullptr);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get data size");
break;
}
if (item_size > 0) {
char* item_data = new char[item_size];
if (auto comgr_status = amd::Comgr::get_data(item, &item_size, item_data);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get data");
break;
}
std::string bundle_entry = remove_file_extension(
std::string(item_bundle_id.c_str() + sizeof(symbols::kOffloadHipV4FatBinName_) - 1));
LogPrintfInfo("Inserting bundle entry of %s : size: %d, data: %p", bundle_entry.c_str(),
item_size, item_data);
code_obj_map[bundle_entry] = std::make_pair(item_data, item_size);
}
}
passed = true;
} while (0);
return passed;
}
static bool PopulateCodeObjectMap(
const void* image, const std::set<std::string>& unique_isa_names,
std::map<std::string, std::pair<const void*, size_t>>& code_obj_map) {
bool passed = false;
do {
comgr_helper::ComgrDataUniqueHandle data_object;
if (auto comgr_status = data_object.Create(AMD_COMGR_DATA_KIND_FATBIN);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Creating data object failed with status %d ", comgr_status);
break;
}
// There is no way to find size of offload bundle, so we pass 4096 here.
if (auto comgr_status =
amd::Comgr::set_data(data_object.get(), 4096, reinterpret_cast<const char*>(image));
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
break;
}
// Create a query list using COMGR info for unique ISAs.
std::vector<amd_comgr_code_object_info_t> query_list_array;
query_list_array.reserve(unique_isa_names.size());
for (const auto& isa_name : unique_isa_names) {
auto& item = query_list_array.emplace_back();
item.isa = isa_name.c_str();
item.size = 0;
item.offset = 0;
}
// Look up the code object info passing the query list.
if (auto comgr_status = amd::Comgr::lookup_code_object(
data_object.get(), query_list_array.data(), unique_isa_names.size());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
break;
}
for (const auto& item : query_list_array) {
if (item.size > 0) {
char* d = new char[item.size];
std::memcpy(reinterpret_cast<void*>(d), reinterpret_cast<const char*>(image) + item.offset,
item.size);
code_obj_map[item.isa] = std::make_pair(d, item.size);
}
}
passed = true;
} while (0);
return passed;
}
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices) {
if (fname_.empty() && image_ == nullptr) {
LogError("Both Filename and image cannot be null");
return hipErrorInvalidValue;
}
if (image_ != nullptr) {
if (!amd::Os::FindFileNameFromAddress(image_, &fname_, &foffset_)) {
fname_ = std::string("");
foffset_ = 0;
}
}
// If file name & path are available (or it is passed to you), then get the file desc to use
// COMGR file slice APIs.
if (image_ == nullptr && fname_.size() > 0) {
// Get File Handle & size of the file.
} else {
ufd_ = PlatformState::instance().GetUniqueFileHandle(fname_.c_str());
if (ufd_ == nullptr) {
return hipErrorFileNotFound;
@@ -197,264 +482,183 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
image_mapped_ = true;
}
// At this line, image should be a valid ptr.
guarantee(image_ != nullptr, "Image cannot be nullptr, file:%s did not map for some reason",
fname_.c_str());
bool is_compressed = IsCodeObjectCompressed(image_),
is_uncompressed = IsCodeObjectUncompressed(image_);
// It better be elf if its neither compressed nor uncompressed
if (!is_compressed && !is_uncompressed) {
if (IsCodeObjectElf(image_)) {
// Load the binary directly
auto elf_size = amd::Elf::getElfSize(image_);
for (size_t i = 0; i < devices.size(); i++) {
if (hipSuccess != AddDevProgram(devices[i], image_, elf_size, 0))
return hipErrorInvalidImage;
}
return hipSuccess; // We are done since it was already ELF
} else {
LogError("The code object has invalid header: compressed, uncompressed or elf");
return hipErrorInvalidImage;
}
}
// Create a list of all targets, which the current device can run
// For example, gfx1030 can run gfx1030, gfx10-geneeric, amdgcnspirv
std::set<std::string> unique_isa_names;
const std::string spirv_isa_name{"spirv64-amd-amdhsa--amdgcnspirv"};
unique_isa_names.insert(spirv_isa_name); // Insert SPIRV ISA name
for (auto device : devices) {
std::string device_name = device->devices()[0]->isa().isaName();
unique_isa_names.insert(device_name);
auto generic_name = TargetToGeneric(device_name);
LogPrintfInfo("Looking up generic name of : %s - %s", device_name.c_str(),
generic_name.c_str());
if (!generic_name.empty()) {
unique_isa_names.insert(generic_name);
}
}
std::map<std::string, std::pair<const void*, size_t>> code_obj_map; //!< code object map
if (is_compressed) {
if (!UncompressAndPopulateCodeObject(image_, unique_isa_names, code_obj_map)) {
return hipErrorInvalidImage;
}
} else { // uncompressed code object
if (!PopulateCodeObjectMap(image_, unique_isa_names, code_obj_map)) {
return hipErrorInvalidImage;
}
}
hipError_t hip_status = hipErrorInvalidImage;
do {
bool isCompressed = false;
// If the image ptr is not clang offload bundle then just directly point the image.
if (!CodeObject::IsClangOffloadMagicBundle(image_, isCompressed)) {
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
uint64_t elf_size = CodeObject::ElfSize(image_);
if (elf_size == 0) {
hip_status = hipErrorInvalidImage;
break;
}
hip_status = AddDevProgram(devices[dev_idx], image_, elf_size, 0);
bool spirv_isa_found = code_obj_map.find(spirv_isa_name) != code_obj_map.end();
for (auto device : devices) {
std::string device_name = device->devices()[0]->isa().isaName();
auto generic_target_name = TargetToGeneric(device_name); // Generic Code Object
auto native_co = code_obj_map.find(device_name); // Native Code Object
auto generic_co = code_obj_map.find(generic_target_name); // generic Code Object
LogPrintfInfo("Device name: %s Generic name: %s", device_name.c_str(),
generic_target_name.c_str());
// If the size is not 0, that means we found the native isa code object
if (native_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) {
LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId());
// We need to do this because there is existing mechanism which deletes code object in
// destructor. Ideally next set of refactor should sort it.
char* co = new char[native_co->second.second];
std::memcpy(co, reinterpret_cast<const char*>(native_co->second.first),
native_co->second.second);
hip_status = AddDevProgram(device, co, native_co->second.second, 0);
if (hip_status != hipSuccess) {
break;
}
}
break;
}
if (!isCompressed) {
if (CodeObject::containGenericTarget(image_)) {
LogInfo("offload bundle contains generic target code object");
containGenericTarget = true;
}
}
if (isCompressed || containGenericTarget) {
size_t major = 0, minor = 0;
amd::Comgr::get_version(&major, &minor);
if ((major == 2 && minor >= 8) || major > 2) {
hip_status = ExtractFatBinaryUsingCOMGR(image_, devices);
break;
} else if (isCompressed) {
LogPrintfError("comgr %zu.%zu cannot support compressed mode which requires comgr 2.8+",
major, minor);
hip_status = hipErrorNotSupported;
break;
}
}
// Create a data object, if it fails return error
if ((comgr_status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)) !=
AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Creating data object failed with status %d ", comgr_status);
hip_status = hipErrorInvalidValue;
break;
}
if (ufd_ != nullptr && amd::Os::isValidFileDesc(ufd_->fdesc_)) {
LogPrintfError("Have valid file!%d", ufd_->fdesc_);
}
if (image_ != nullptr) {
// Using the image ptr, map the data object.
if ((comgr_status =
amd::Comgr::set_data(data_object, 4096, reinterpret_cast<const char*>(image_))) !=
AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
hip_status = hipErrorInvalidValue;
break;
}
} else {
guarantee(false, "Cannot have both fname_ and image_ as nullptr");
}
// Find the unique number of ISAs needed for this COMGR query.
std::unordered_map<std::string, std::pair<size_t, size_t>> unique_isa_names;
for (auto device : devices) {
std::string device_name = device->devices()[0]->isa().isaName();
unique_isa_names.insert({device_name, std::make_pair<size_t, size_t>(0, 0)});
}
// there are two spirv targets, spirv64-amd-amdhsa--amdgcnspirv and
// spirv64-amd-amdhsa-unknown-amdgcnspirv.
// eventually we will remove spirv64-amd-amdhsa--amdgcnspirv
const std::vector<std::string> spirv_isa_names = {"spirv64-amd-amdhsa--amdgcnspirv",
"spirv64-amd-amdhsa-unknown-amdgcnspirv"};
for (const auto& spirv_isa_name : spirv_isa_names) {
unique_isa_names.insert({spirv_isa_name, std::make_pair<size_t, size_t>(0, 0)});
}
// Create a query list using COMGR info for unique ISAs.
std::vector<amd_comgr_code_object_info_t> query_list_array;
query_list_array.reserve(unique_isa_names.size());
for (const auto& isa_name : unique_isa_names) {
auto& item = query_list_array.emplace_back();
item.isa = isa_name.first.c_str();
item.size = 0;
item.offset = 0;
}
// Look up the code object info passing the query list.
if ((comgr_status = amd::Comgr::lookup_code_object(data_object, query_list_array.data(),
unique_isa_names.size())) !=
AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
hip_status = hipErrorInvalidValue;
break;
}
for (const auto& item : query_list_array) {
auto unique_it = unique_isa_names.find(item.isa);
guarantee(unique_isa_names.cend() != unique_it, "Cannot find unique isa ");
unique_it->second = std::pair<size_t, size_t>(static_cast<size_t>(item.size),
static_cast<size_t>(item.offset));
}
bool spirv_isa_found = false;
decltype(unique_isa_names.begin()) spirv_isa_handle;
for (const auto& spirv_isa_name : spirv_isa_names) {
auto iter = unique_isa_names.find(spirv_isa_name);
if (iter->second.first != 0) {
spirv_isa_found = true;
spirv_isa_handle = iter;
}
}
bool get_spirv_data_res = false;
std::once_flag get_spirv_data_flag;
std::unordered_map<std::string, std::pair<char*, size_t>> compiled_co; // code object cache
comgr_helper::ComgrDataSetUniqueHandle spirv_data_set;
comgr_helper::ComgrDataUniqueHandle spirv_data;
auto get_spirv_data = [&]() {
if (comgr_status = spirv_data_set.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create SPIRV Data set");
return;
}
if (comgr_status = spirv_data.Create(AMD_COMGR_DATA_KIND_SPIRV);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create SPIRV Data");
return;
}
if (comgr_status =
amd::Comgr::set_data(spirv_data.get(), spirv_isa_handle->second.first /* size */,
reinterpret_cast<char*>(const_cast<void*>(image_)) +
spirv_isa_handle->second.second /* buffer */);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to assign data in comgr");
return;
}
if (comgr_status = amd::Comgr::set_data_name(spirv_data.get(), "hip_code_object.spv");
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set data name");
return;
}
if (comgr_status = amd::Comgr::data_set_add(spirv_data_set.get(), spirv_data.get());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to add spir data");
return;
}
get_spirv_data_res = true;
};
LogPrintfInfo("Searching for code objects, HIP_FORCE_SPIRV_CODEOBJECT: %d",
HIP_FORCE_SPIRV_CODEOBJECT);
for (auto device : devices) {
std::string device_name = device->devices()[0]->isa().isaName();
auto dev_it = unique_isa_names.find(device_name);
// If the size is not 0, that means we found the native isa code object
if (dev_it->second.first != 0 && !HIP_FORCE_SPIRV_CODEOBJECT) {
LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId());
guarantee(unique_isa_names.cend() != dev_it,
"Cannot find the device name in the unique device name");
hip_status = AddDevProgram(
device, reinterpret_cast<address>(const_cast<void*>(image_)) + dev_it->second.second,
dev_it->second.first, dev_it->second.second);
} else if (generic_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) {
LogPrintfInfo("Using Generic code object: %s : %s", device->devices()[0]->isa().targetId(),
generic_target_name.c_str());
char* co = new char[generic_co->second.second];
std::memcpy(co, reinterpret_cast<const char*>(generic_co->second.first),
generic_co->second.second);
hip_status = AddDevProgram(device, co, generic_co->second.second, 0);
if (hip_status != hipSuccess) {
break;
}
} else if (spirv_isa_found) {
std::call_once(get_spirv_data_flag, get_spirv_data);
if(!get_spirv_data_res) {
hip_status = hipErrorInvalidValue;
break;
}
std::string target_id = device->devices()[0]->isa().targetId();
if (auto code_iter = compiled_co.find(target_id); code_iter != compiled_co.end()) {
// We have already compiled for it, lets reuse the code object
char* co = new char[code_iter->second.second];
std::memcpy(co, code_iter->second.first, code_iter->second.second);
LogPrintfInfo("reusing code object for: %s", target_id.c_str());
hip_status = AddDevProgram(device, co, code_iter->second.second, 0);
if (hip_status != hipSuccess) {
break;
}
continue;
}
LogPrintfInfo("Creating ISA for: %s from spirv", target_id.c_str());
comgr_helper::ComgrActionInfoUniqueHandle reloc_action;
std::string isa = "amdgcn-amd-amdhsa--" + target_id;
if (comgr_status = reloc_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create action");
break;
}
if (comgr_status = amd::Comgr::action_info_set_isa_name(reloc_action.get(), isa.c_str());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set ISA name");
break;
}
if (comgr_status = amd::Comgr::action_info_set_device_lib_linking(reloc_action.get(), true);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set device lib linking");
break;
}
if (comgr_status = amd::Comgr::action_info_set_option_list(
reloc_action.get(), nullptr /* options list */, 0 /* options size */);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set option list");
break;
}
LogPrintfInfo("Creating ISA for: %s from spirv", target_id.c_str());
comgr_helper::ComgrDataSetUniqueHandle spirv_data_set;
comgr_helper::ComgrDataSetUniqueHandle reloc_data;
if (comgr_status = reloc_data.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create reloc data set");
comgr_helper::ComgrDataUniqueHandle spirv_data;
comgr_helper::ComgrActionInfoUniqueHandle reloc_action;
if (auto comgr_status = spirv_data_set.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create SPIRV Data set");
break;
}
if (comgr_status =
if (auto comgr_status = spirv_data.Create(AMD_COMGR_DATA_KIND_SPIRV);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create SPIRV Data");
break;
}
auto spirv_isa_handle = code_obj_map.find(spirv_isa_name);
if (auto comgr_status =
amd::Comgr::set_data(spirv_data.get(), spirv_isa_handle->second.second /* size */,
reinterpret_cast<const char*>(spirv_isa_handle->second.first)
/* buffer */);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to assign SPIRV data");
break;
}
if (auto comgr_status = amd::Comgr::set_data_name(spirv_data.get(), "hip_code_object.spv");
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set spirv data's name");
break;
}
if (auto comgr_status = amd::Comgr::data_set_add(spirv_data_set.get(), spirv_data.get());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to add spir data to data set");
break;
}
if (auto comgr_status = reloc_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create reloc action");
break;
}
if (auto comgr_status =
amd::Comgr::action_info_set_isa_name(reloc_action.get(), isa.c_str());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set reloc action's isa name");
break;
}
if (auto comgr_status = reloc_data.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create reloc data");
break;
}
if (auto comgr_status =
amd::Comgr::action_info_set_device_lib_linking(reloc_action.get(), true);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set device lib linking for reloc action");
break;
}
if (auto comgr_status =
amd::Comgr::do_action(AMD_COMGR_ACTION_COMPILE_SPIRV_TO_RELOCATABLE,
reloc_action.get(), spirv_data_set.get(), reloc_data.get());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to compile to reloc");
LogError("Failed to do action: codegen bc ot reloc");
LogError("Failed to compile spirv to reloc");
break;
}
comgr_helper::ComgrActionInfoUniqueHandle exe_action;
comgr_helper::ComgrDataSetUniqueHandle exe_output;
if (comgr_status = exe_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create action");
if (auto comgr_status = exe_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create exe action");
break;
}
if (comgr_status = amd::Comgr::action_info_set_isa_name(exe_action.get(), isa.c_str());
if (auto comgr_status = amd::Comgr::action_info_set_isa_name(exe_action.get(), isa.c_str());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to set exe action isa name");
break;
}
if (comgr_status = exe_output.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
if (auto comgr_status = exe_output.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to create exe output");
break;
}
if (comgr_status =
if (auto comgr_status =
amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
exe_action.get(), reloc_data.get(), exe_output.get());
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
@@ -463,74 +667,51 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
}
amd_comgr_data_t exe_data_handle;
if (comgr_status = amd::Comgr::action_data_get_data(
if (auto comgr_status = amd::Comgr::action_data_get_data(
exe_output.get(), AMD_COMGR_DATA_KIND_EXECUTABLE, 0, &exe_data_handle);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to action get exe data");
LogError("Failed to get exe data");
break;
}
// Move ownership of exe_data_handle to exe_data
comgr_helper::ComgrDataUniqueHandle exe_data(exe_data_handle);
size_t co_size;
if (comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, NULL);
size_t co_size = 0;
if (auto comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, NULL);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get exe size");
break;
}
char* co = new char[co_size];
if (comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, co);
if (auto comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, co);
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("Failed to get exe data");
break;
}
auto elf_size = CodeObject::ElfSize(co);
hip_status = AddDevProgram(device, co, elf_size, 0);
hip_status = AddDevProgram(device, co, co_size, 0);
if (hip_status != hipSuccess) {
break;
}
// Save the compiled code object
compiled_co[target_id] = std::make_pair(co, elf_size);
} else {
// We found neither a compatible code object nor SPIRV
LogPrintfError(
"No compatible code objects found for: %s, value of HIP_FORCE_SPIRV_CODEOBJECT: %d",
device->devices()[0]->isa().targetId(), HIP_FORCE_SPIRV_CODEOBJECT);
hip_status = hipErrorInvalidValue;
break;
}
}
} while (0);
if (comgr_status != AMD_COMGR_STATUS_SUCCESS) {
LogError("comgr API call failed");
hip_status = hipErrorInvalidValue;
}
// Clean up file and memory resouces if hip_status failed for some reason.
if (hip_status != hipSuccess && hip_status != hipErrorInvalidKernelFile) {
ReleaseImageAndFile();
}
if (data_object.handle) {
if ((comgr_status = amd::Comgr::release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status);
return hipErrorInvalidValue;
}
// release code objects
for (const auto& co : code_obj_map) {
delete[] reinterpret_cast<const char*>(co.second.first);
}
return hip_status;
}
hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devices) {
amd::ScopedLock lock(FatBinaryLock());
bool containGenericTarget = false;
return ExtractFatBinaryUsingCOMGR(devices, containGenericTarget);
}
hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image,
size_t binary_size, size_t binary_offset) {
int devID = device->deviceId();
@@ -569,55 +750,4 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) {
}
return hipSuccess;
}
// ================================================================================================
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
const std::vector<hip::Device*>& devices) {
hipError_t hip_status = hipSuccess;
// At this line, image should be a valid ptr.
guarantee(data != nullptr, "Image cannot be nullptr");
do {
std::vector<std::pair<const void*, size_t>> code_objs;
// Copy device names
std::vector<std::string> device_names;
device_names.reserve(devices.size());
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
}
hip_status =
CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0, device_names, code_objs);
if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) {
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
if (code_objs[dev_idx].first) {
hip_status =
AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
if (hip_status != hipSuccess) {
return hip_status;
}
} else {
// This is the case of hipErrorNoBinaryForGpu which will finally fail app on device
// without code object
LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(),
device_names[dev_idx].c_str());
}
}
} else if (hip_status == hipErrorInvalidKernelFile) {
hip_status = hipSuccess;
// If the image ptr is not clang offload bundle then just directly point the image.
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
hip_status = AddDevProgram(devices[dev_idx], data, CodeObject::ElfSize(data), 0);
if (hip_status != hipSuccess) {
return hip_status;
}
}
} else {
LogPrintfError("CodeObject::extractCodeObjectFromFatBinaryUsingComgr failed with status %d\n",
hip_status);
}
} while (0);
return hip_status;
}
} // namespace hip
+1 -20
Ver fichero
@@ -39,26 +39,7 @@ public:
FatBinaryInfo(const char* fname, const void* image);
~FatBinaryInfo();
// Loads Fat binary from file or image, unbundles COs for devices.
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
bool &containGenericTarget);
/**
* @brief Extract code object from fatbin using comgr unbundling action via calling
* CodeObject::extractCodeObjectFromFatBinaryUsingComgr
*
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
* compressed and even SPIR-V(to be supported later) mode.
* @param[in] devices devices whose code objects will be extracted.
* Returned error code
*
* @return #hipSuccess, #hipErrorNoBinaryForGpu, #hipErrorInvalidValue
*
* @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr()
*/
hipError_t ExtractFatBinaryUsingCOMGR(const void* data,
const std::vector<hip::Device*>& devices);
hipError_t ExtractFatBinary(const std::vector<hip::Device*>& devices);
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size,
size_t binary_offset);
hipError_t BuildProgram(const int device_id);