diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp index 69416f10c4..4336fdf0ea 100644 --- a/projects/clr/hipamd/src/hip_code_object.cpp +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -35,584 +35,6 @@ namespace hip { hipError_t ihipFree(void* ptr); // forward declaration of methods required for managed variables hipError_t ihipMallocManaged(void** ptr, size_t size, size_t align = 0, bool use_host_ptr = 0); -namespace { -// In uncompressed mode -constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__"; -static constexpr size_t kOffloadBundleUncompressedMagicStrSize = - sizeof(kOffloadBundleUncompressedMagicStr); - -// In compressed mode -constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB"; -static constexpr size_t kOffloadBundleCompressedMagicStrSize = - sizeof(kOffloadBundleCompressedMagicStr); - -constexpr char kOffloadKindHip[] = "hip"; -constexpr char kOffloadKindHipv4[] = "hipv4"; -constexpr char kOffloadKindHcc[] = "hcc"; -constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-"; -constexpr char kHipFatBinName[] = "hipfatbin"; -constexpr char kHipFatBinName_[] = "hipfatbin-"; -constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix -constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-"; - -// Clang Offload bundler description & Header in uncompressed mode. -struct __ClangOffloadBundleInfo { - uint64_t offset; - uint64_t size; - uint64_t bundleEntryIdSize; - const char bundleEntryId[1]; -}; - -struct __ClangOffloadBundleUncompressedHeader { - const char magic[kOffloadBundleUncompressedMagicStrSize - 1]; - uint64_t numOfCodeObjects; - __ClangOffloadBundleInfo desc[1]; -}; - -// Clang Offload bundler description & Header in compressed mode. -struct __ClangOffloadBundleCompressedHeader { - const char magic[kOffloadBundleCompressedMagicStrSize - 1]; - uint16_t versionNumber; - uint16_t compressionMethod; - uint32_t totalSize; - uint32_t uncompressedBinarySize; - uint64_t Hash; - const char compressedBinarydesc[1]; -}; -} // namespace - -bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) { - std::string magic(reinterpret_cast(data), - kOffloadBundleUncompressedMagicStrSize - 1); - if (!magic.compare(kOffloadBundleUncompressedMagicStr)) { - isCompressed = false; - return true; - } - std::string magic1(reinterpret_cast(data), - kOffloadBundleCompressedMagicStrSize - 1); - if (!magic1.compare(kOffloadBundleCompressedMagicStr)) { - isCompressed = true; - return true; - } - return false; -} - -uint32_t CodeObject::getGenericVersion(const void* image) { - const Elf64_Ehdr* ehdr = reinterpret_cast(image); - return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA && - ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ? - ((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) : 0; -} - -bool CodeObject::isGenericTarget(const void* image) { - return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN; -} - -bool CodeObject::containGenericTarget(const void *data) { - const auto obheader = reinterpret_cast(data); - const auto* desc = &obheader->desc[0]; - for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, - desc = reinterpret_cast( - reinterpret_cast(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize)) { - if (desc->size == 0) continue; - const void* image = - reinterpret_cast(reinterpret_cast(obheader) + desc->offset); - if (isGenericTarget(image)) { - return true; - } - } - return false; -} - -uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } - -// Consumes the string 'consume_' from the starting of the given input -// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa-- -// input will become gfx908. -static bool consume(std::string& input, std::string consume_) { - if (input.substr(0, consume_.size()) != consume_) { - return false; - } - input = input.substr(consume_.size()); - return true; -} - -// Trim String till character, will be used to get gpuname -// example: input is gfx908:sram-ecc+ and trim char is : -// input will become :sram-ecc+. -static std::string trimName(std::string& input, char trim) { - auto pos_ = input.find(trim); - auto res = input; - if (pos_ == std::string::npos) { - input = ""; - } else { - res = input.substr(0, pos_); - input = input.substr(pos_); - } - return res; -} - -// Trim String till character, will be used to get bundle entry ID. -// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is . -// input will become amdgcn-amd-amdhsa--gfx1035 -static bool trimNameTail(std::string& input, char trim) { - auto pos_ = input.rfind(trim); - if (pos_ == std::string::npos) { - return false; - } - input = input.substr(0, pos_); - return true; -} - -static char getFeatureValue(std::string& input, std::string feature) { - char res = ' '; - if (consume(input, std::move(feature))) { - res = input[0]; - input = input.substr(1); - } - return res; -} - -static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value, - char& xnack_value) { - processor = trimName(input, ':'); - sramecc_value = getFeatureValue(input, std::string(":sramecc")); - if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false; - xnack_value = getFeatureValue(input, std::string(":xnack")); - if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false; - return true; -} - -static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, - std::string agent_triple_target_id, unsigned int genericVersion) { - // Primitive Check - if (co_triple_target_id == agent_triple_target_id) return true; - - // Parse code object triple target id - if (!consume(co_triple_target_id, std::string(kAmdgcnTargetTriple) + '-')) { - return false; - } - - std::string co_processor; - char co_sram_ecc, co_xnack; - if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { - return false; - } - - if (!co_triple_target_id.empty()) return false; - - // Parse agent isa triple target id - if (!consume(agent_triple_target_id, std::string(kAmdgcnTargetTriple) + '-')) { - return false; - } - - std::string agent_isa_processor; - char isa_sram_ecc, isa_xnack; - if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { - return false; - } - - if (!agent_triple_target_id.empty()) return false; - - // Check for compatibility - if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { - // co_processor is generic target - if (!helpers::IsCompatibleWithGenericTarget(co_processor, agent_isa_processor)) - return false; - } else if (agent_isa_processor != co_processor) { - return false; - } - if (co_sram_ecc != ' ') { - if (co_sram_ecc != isa_sram_ecc) return false; - } - if (co_xnack != ' ') { - if (co_xnack != isa_xnack) return false; - } - return true; -} - -bool CodeObject::QueryGenericTarget(std::string agentTarget, std::string& processor, - char& sram_ecc, char& xnack) { - static const std::string head = std::string(kAmdgcnTargetTriple) + '-'; - // Parse agent isa triple target id - if (!consume(agentTarget, head)) { - return false; - } - if (!getTargetIDValue(agentTarget, processor, sram_ecc, xnack)) { - return false; - } - if (processor.empty()) return false; - auto &map = helpers::GenericTargetMapping(); - auto search = map.find(processor); - if (search == map.end()) return false; - processor = head + search->second; - return true; -} - -// ================================================================================================ -size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) { - if (isCompressed) { - const auto obheader = reinterpret_cast(data); - return obheader->totalSize; - } else { - const auto obheader = reinterpret_cast(data); - const __ClangOffloadBundleInfo* desc = &obheader->desc[0]; - uint64_t i = 0; - while (++i < obheader->numOfCodeObjects) { - desc = reinterpret_cast( - reinterpret_cast(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize); - } - return desc->offset + desc->size; - } -} - -// ================================================================================================ -hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( - const void* data, size_t size, const std::vector& agent_triple_target_ids, - std::vector>& code_objs) { - hipError_t hipStatus = hipSuccess; - amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS; - - const size_t num_devices = agent_triple_target_ids.size(); - size_t num_code_objs = num_devices; - bool isCompressed = false; - if (!IsClangOffloadMagicBundle(data, isCompressed)) { - ClPrint(amd::LOG_INFO, amd::LOG_COMGR, "IsClangOffloadMagicBundle(%p) return false", data); - // hipModuleLoadData() will possibly call here - return hipErrorInvalidKernelFile; - } - - if (size == 0) size = getFatbinSize(data, isCompressed); - - amd_comgr_data_t dataCodeObj{0}; - amd_comgr_data_set_t dataSetBundled{0}; - amd_comgr_data_set_t dataSetUnbundled{0}; - amd_comgr_action_info_t actionInfoUnbundle{0}; - amd_comgr_data_t item{0}; - - - std::set devicesSet{}; // To make sure device is unique - std::set genericDevicesSet{}; // Used to record generic targets - - std::vector bundleEntryIDs{}; - static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix - for (size_t i = 0; i < num_devices; i++) { - auto res = devicesSet.insert(hipv4 + agent_triple_target_ids[i]); - if (res.second) { - // This is a new device in devicesSet - bundleEntryIDs.push_back(res.first->c_str()); - std::string processor; - char sram_ecc = ' ', xnack = ' '; - if (!QueryGenericTarget(agent_triple_target_ids[i], processor, sram_ecc, xnack)) { - continue; // No generic target for this device - } - // Now processor is generic such as - // amdgcn-amd-amdhsa--gfx9-4-generic, amdgcn-amd-amdhsa--gfx11-generic - processor = hipv4 + processor; - auto ret = genericDevicesSet.insert(processor); - if (ret.second) { - // Without feature - bundleEntryIDs.push_back(ret.first->c_str()); - } - if (xnack != ' ') { - ret = genericDevicesSet.insert(processor + ":xnack" + xnack); - if (ret.second) { - // Generic target with xnack feature - bundleEntryIDs.push_back(ret.first->c_str()); - } - } - if (sram_ecc != ' ') { - processor += ":sramecc"; - processor += sram_ecc; - ret = genericDevicesSet.insert(processor); - if (ret.second) { - // Generic target with sramecc feature - bundleEntryIDs.push_back(ret.first->c_str()); - } - if (xnack != ' ') { - ret = genericDevicesSet.insert(processor + ":xnack" + xnack); - if (ret.second) { - // Generic target with sramecc and xnack features - bundleEntryIDs.push_back(ret.first->c_str()); - } - } - } - } - } - - do { - // Create Bundled dataset - comgrStatus = amd::Comgr::create_data_set(&dataSetBundled); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - // CodeObject - comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError( - "amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast(data)); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data, - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError( - "amd::Comgr::set_data_name(" - ") failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - // Set up ActionInfo - comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids( - actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size()); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError( - "amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh", - bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - // Unbundle - comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle, - dataSetBundled, dataSetUnbundled); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - // Check CodeObject count - size_t count = 0; - comgrStatus = - amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - // Initialize Code objects - code_objs.reserve(num_code_objs); - for (size_t i = 0; i < num_code_objs; i++) { - code_objs.push_back(std::make_pair(nullptr, 0)); - } - - for (size_t i = 0; i < count; i++) { - if (num_code_objs == 0) break; - - size_t itemSize = 0; - comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled, - AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count, - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - - comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count, - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - std::string bundleEntryId(itemSize, 0); - comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data()); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count, - itemSize, comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "Found bundleEntryId=%s", bundleEntryId.c_str()); - - // Remove bundleEntryId_ - if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) { - // This is behavour in comgr unbundling which is subject to change. - // So just give info. - ClPrint(amd::LOG_INFO, amd::LOG_COMGR, - "bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(), - kOffloadHipV4FatBinName_); - } - trimNameTail(bundleEntryId, '.'); // Remove .fileExtention - - // Currently we only support EF_AMDGPU_GENERIC_VERSION_MIN on generic target - uint32_t genericVersion = - bundleEntryId.find("generic") != bundleEntryId.npos ? EF_AMDGPU_GENERIC_VERSION_MIN : 0; - char* itemData = nullptr; - for (size_t dev = 0; dev < num_devices; ++dev) { - if (code_objs[dev].first != nullptr) { - if (!isGenericTarget(code_objs[dev].first)) { - continue; // Specific target already found - } else if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { - continue; // Generic target already found, no need to check another generic - } - } - ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", - dev, agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str()); - if (isCodeObjectCompatibleWithDevice(bundleEntryId, agent_triple_target_ids[dev], - genericVersion)) { - if (itemData == nullptr) { - itemSize = 0; - comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count, - comgrStatus); - hipStatus = hipErrorInvalidValue; - break; - } - if (itemSize == 0) { - // If there isn't a code object for this device, - // amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with - // valid name but no data. We need continue searching for other devices - ClPrint(amd::LOG_INFO, amd::LOG_COMGR, - "amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev, - agent_triple_target_ids[dev].c_str()); - break; - } - // itemData should be deleted in fatbin's destructor - itemData = new char[itemSize]; - if (itemData == nullptr) { - LogError("no enough memory"); - hipStatus = hipErrorOutOfMemory; - break; - } - comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count, - itemSize, comgrStatus); - hipStatus = hipErrorInvalidValue; - delete[] itemData; - itemData = nullptr; - break; - } - } - if (code_objs[dev].first != nullptr) { - // This must be data of generic target - bool used = false; // Still used by other devices? - for (size_t i = 0; i < num_devices; ++i) { - if (dev != i && code_objs[dev].first == code_objs[i].first) { - used = true; - break; - } - } - if (!used) { - delete[] reinterpret_cast(code_objs[dev].first); - } - } else { - --num_code_objs; - } - code_objs[dev] = std::make_pair(reinterpret_cast(itemData), itemSize); - ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, - "Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s, %s), " - "Size=%zu, num_code_objs=%zu", - dev, agent_triple_target_ids[dev].c_str(), itemData, - isCompressed ? "compressed" : "uncompressed", - genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN ? "generic" : "non-generic", - itemSize, num_code_objs); - } - } - - comgrStatus = amd::Comgr::release_data(item); - item.handle = 0; - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - } - if (hipStatus != hipSuccess) break; - } - } while (0); - - // Cleanup - if (actionInfoUnbundle.handle) { - comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - } - } - if (dataSetBundled.handle) { - comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - } - } - - if (dataSetUnbundled.handle) { - comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh", - comgrStatus); - hipStatus = hipErrorInvalidValue; - } - } - - if (dataCodeObj.handle) { - comgrStatus = amd::Comgr::release_data(dataCodeObj); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - } - } - - if (item.handle) { - comgrStatus = amd::Comgr::release_data(item); - if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus); - hipStatus = hipErrorInvalidValue; - } - } - - return hipStatus; -} hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { amd::ScopedLock lock(dclock_); @@ -620,7 +42,7 @@ hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { // Number of devices = 1 in dynamic code object fb_info_ = new FatBinaryInfo(fname, image); std::vector devices = {g_devices[ihipGetDevice()]}; - IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices)); + IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinaryUsingCOMGR(devices)); // No Lazy loading for DynCO IHIP_RETURN_ONFAIL(fb_info_->BuildProgram(ihipGetDevice())); @@ -834,7 +256,7 @@ hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) { // Create a new fat binary object and extract the fat binary for all devices. FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo(nullptr, data); - hipError_t err = fatBinaryInfo->ExtractFatBinary(g_devices); + hipError_t err = fatBinaryInfo->ExtractFatBinaryUsingCOMGR(g_devices); programs = fatBinaryInfo; return err; } diff --git a/projects/clr/hipamd/src/hip_code_object.hpp b/projects/clr/hipamd/src/hip_code_object.hpp index 83ae9502e0..dafa2d2029 100644 --- a/projects/clr/hipamd/src/hip_code_object.hpp +++ b/projects/clr/hipamd/src/hip_code_object.hpp @@ -35,6 +35,52 @@ THE SOFTWARE. #include "platform/program.hpp" namespace hip { +namespace symbols { +// In uncompressed mode +constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__"; +static constexpr size_t kOffloadBundleUncompressedMagicStrSize = + sizeof(kOffloadBundleUncompressedMagicStr); + +// In compressed mode +constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB"; +static constexpr size_t kOffloadBundleCompressedMagicStrSize = + sizeof(kOffloadBundleCompressedMagicStr); + +constexpr char kOffloadKindHip[] = "hip"; +constexpr char kOffloadKindHipv4[] = "hipv4"; +constexpr char kOffloadKindHcc[] = "hcc"; +constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-"; +constexpr char kHipFatBinName[] = "hipfatbin"; +constexpr char kHipFatBinName_[] = "hipfatbin-"; +constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix +constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-"; + +// Clang Offload bundler description & Header in uncompressed mode. +struct ClangOffloadBundleInfo { + uint64_t offset; + uint64_t size; + uint64_t bundleEntryIdSize; + const char bundleEntryId[1]; +}; + +struct ClangOffloadBundleUncompressedHeader { + const char magic[kOffloadBundleUncompressedMagicStrSize - 1]; + uint64_t numOfCodeObjects; + ClangOffloadBundleInfo desc[1]; +}; + +// Clang Offload bundler description & Header in compressed mode. +struct ClangOffloadBundleCompressedHeader { + const char magic[kOffloadBundleCompressedMagicStrSize - 1]; + uint16_t versionNumber; + uint16_t compressionMethod; + uint32_t totalSize; + uint32_t uncompressedBinarySize; + uint64_t Hash; + const char compressedBinarydesc[1]; +}; +} // namespace symbols + //Forward Declaration for friend usage class PlatformState; @@ -43,50 +89,6 @@ class CodeObject { public: virtual ~CodeObject() {} - // Functions to add_dev_prog and build - static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr, - size_t binary_size); - static hipError_t build_module(hipModule_t hmod, const std::vector& devices); - - static uint64_t ElfSize(const void* emi); - - static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed); - - static uint32_t getGenericVersion(const void* image); - - static bool isGenericTarget(const void* image); - - static bool containGenericTarget(const void *data); - - // Return size of fat bin - static size_t getFatbinSize(const void* data, const bool isCompressed = false); - - /** - * @brief Extract code object from fatbin using comgr unbundling action - * - * @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed, - * compressed and even SPIR-V(to be supported later) mode. - * @param[in] size the size of the bundle data - * @param[in] agent_triple_target_ids isa names of concerned devices - * @param[out] code_objs the buffer address and size pairs of extracted code objects of - * concerned devices - * Returned error code - * - * @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue, - * #hipErrorNoBinaryForGpu - * - * @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data, - * const std::vector& devices) - */ - static hipError_t extractCodeObjectFromFatBinaryUsingComgr( - const void* data, size_t size, const std::vector& devices, - std::vector>& code_objs); - - // Query the generic target of agent target. - // Return true on successfull query, false on failure - static bool QueryGenericTarget(std::string agentTarget, std::string& processor, char& sram_ecc, - char& xnack); - protected: CodeObject() {} @@ -185,6 +187,6 @@ private: std::unordered_map managedVarsDevicePtrInitalized_; }; -}; // namespace hip +}; // namespace hip #endif /* HIP_CODE_OBJECT_HPP */ diff --git a/projects/clr/hipamd/src/hip_comgr_helper.cpp b/projects/clr/hipamd/src/hip_comgr_helper.cpp index 561d332f1a..66f1d70cc0 100644 --- a/projects/clr/hipamd/src/hip_comgr_helper.cpp +++ b/projects/clr/hipamd/src/hip_comgr_helper.cpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #define LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING 1 #include "llvm/BinaryFormat/ELF.h" + #if defined(_WIN32) #include #if defined(__has_attribute) @@ -32,7 +33,6 @@ THE SOFTWARE. using namespace llvm::ELF; namespace hip { - std::unordered_set LinkProgram::linker_set_; namespace helpers { @@ -64,239 +64,6 @@ struct __ClangOffloadBundleHeader { __ClangOffloadBundleInfo desc[1]; }; -static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, - bool& sramEccSupported) { - switch (EFlags & EF_AMDGPU_MACH) { - case EF_AMDGPU_MACH_AMDGCN_GFX700: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx700"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX701: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx701"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX702: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx702"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX703: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx703"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX704: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx704"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX705: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx705"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX801: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx801"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX802: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx802"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX803: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx803"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX805: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx805"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX810: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx810"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX900: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx900"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX902: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx902"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX904: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx904"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX906: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx906"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX908: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx908"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX909: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx909"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX90A: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx90a"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX90C: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx90c"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX942: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx942"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX950: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx950"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1010: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1010"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1011: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1011"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1012: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1012"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1013: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1013"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1030: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1030"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1031: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1031"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1032: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1032"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1033: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1033"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1034: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1034"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1035: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1035"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1036: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1036"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1100: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1100"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1101: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1101"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1102: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1102"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1103: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1103"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1150: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1150"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1151: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1151"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1200: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1200"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1201: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1201"; - case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx9-generic"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx10-1-generic"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx10-3-generic"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx11-generic"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx12-generic"; - break; - default: - return false; - } - return true; -} - // Consumes the string 'consume_' from the starting of the given input // eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa-- // input will become gfx908. diff --git a/projects/clr/hipamd/src/hip_fatbin.cpp b/projects/clr/hipamd/src/hip_fatbin.cpp index 82e739abe6..051a6bba0c 100644 --- a/projects/clr/hipamd/src/hip_fatbin.cpp +++ b/projects/clr/hipamd/src/hip_fatbin.cpp @@ -20,7 +20,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "hip/hip_runtime_api.h" +#include "llvm/BinaryFormat/ELF.h" + #include "hip_fatbin.hpp" +#include "hip_global.hpp" #include #include "hip_code_object.hpp" @@ -28,7 +32,6 @@ THE SOFTWARE. #include "comgrctx.hpp" namespace hip { - namespace comgr_helper { template class ComgrUniqueHandle { @@ -154,30 +157,312 @@ void ListAllDeviceWithNoCOFromBundle( } } -hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector& devices, - bool &containGenericTarget) { - amd_comgr_data_t data_object {0}; - amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS; - hipError_t hip_status = hipSuccess; +static std::string TargetGenericMap(const std::string& input) { + const static std::unordered_map target_map{ + // clang-format off + {"amdgcn-amd-amdhsa--gfx900" , "amdgcn-amd-amdhsa--gfx9-generic" }, + {"amdgcn-amd-amdhsa--gfx902" , "amdgcn-amd-amdhsa--gfx9-generic" }, + {"amdgcn-amd-amdhsa--gfx904" , "amdgcn-amd-amdhsa--gfx9-generic" }, + {"amdgcn-amd-amdhsa--gfx906" , "amdgcn-amd-amdhsa--gfx9-generic" }, + {"amdgcn-amd-amdhsa--gfx909" , "amdgcn-amd-amdhsa--gfx9-generic" }, + {"amdgcn-amd-amdhsa--gfx90c" , "amdgcn-amd-amdhsa--gfx9-generic" }, + {"amdgcn-amd-amdhsa--gfx942" , "amdgcn-amd-amdhsa--gfx9-4-generic" }, + {"amdgcn-amd-amdhsa--gfx950" , "amdgcn-amd-amdhsa--gfx9-4-generic" }, + {"amdgcn-amd-amdhsa--gfx1010", "amdgcn-amd-amdhsa--gfx10-1-generic"}, + {"amdgcn-amd-amdhsa--gfx1011", "amdgcn-amd-amdhsa--gfx10-1-generic"}, + {"amdgcn-amd-amdhsa--gfx1012", "amdgcn-amd-amdhsa--gfx10-1-generic"}, + {"amdgcn-amd-amdhsa--gfx1013", "amdgcn-amd-amdhsa--gfx10-1-generic"}, + {"amdgcn-amd-amdhsa--gfx1030", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1031", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1032", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1033", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1034", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1035", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1036", "amdgcn-amd-amdhsa--gfx10-3-generic"}, + {"amdgcn-amd-amdhsa--gfx1100", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1101", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1102", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1103", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1150", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1151", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1152", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1153", "amdgcn-amd-amdhsa--gfx11-generic" }, + {"amdgcn-amd-amdhsa--gfx1200", "amdgcn-amd-amdhsa--gfx12-generic" }, + {"amdgcn-amd-amdhsa--gfx1201", "amdgcn-amd-amdhsa--gfx12-generic" }, + // clang-format on + }; + if (auto i = target_map.find(input); i != target_map.end()) { + return i->second; + } + return {}; +} - // If image was passed as a pointer to our hipMod* api, we can try to extract the file name - // if it was mapped by the app. Otherwise use the COMGR data API. - if (fname_.size() == 0) { - if (image_ == nullptr) { - LogError("Both Filename and image cannot be null"); - return hipErrorInvalidValue; +// For sramecc and xnack +static std::string TargetFeatureCheck(const std::string& input, std::string feature) { + if (input.find(feature) != std::string::npos) { + auto feature_p = feature + "+"; // feature present eg: xnack+ + auto feature_m = feature + "-"; // feature absent eg: xnack- + if (input.find(feature_p) != std::string::npos) { + return feature_p; + } else if (input.find(feature_m) != std::string::npos) { + return feature_m; + } + } + return ""; +} + +static std::string TargetToGeneric(std::string input) { + auto sramecc = TargetFeatureCheck(input, "sramecc"); + auto xnack = TargetFeatureCheck(input, "xnack"); + + // Remove all features + size_t index = input.find_first_of(":"); + std::string name_without_feature = input.substr(0, index); + + // Look up generic name + auto generic_name = TargetGenericMap(name_without_feature); + if (generic_name.empty()) { + return generic_name; // No generic exists + } + + // reappend feature + if (!sramecc.empty()) { + generic_name += ":"; + generic_name += sramecc; + } + if (!xnack.empty()) { + generic_name += ":"; + generic_name += xnack; + } + return generic_name; +} + +static bool IsCodeObjectUncompressed(const void* image) { + return std::memcmp(image, + reinterpret_cast(symbols::kOffloadBundleUncompressedMagicStr), + sizeof(symbols::kOffloadBundleUncompressedMagicStr) - 1) == 0; +} + +static bool IsCodeObjectCompressed(const void* image) { + return std::memcmp(image, + reinterpret_cast(symbols::kOffloadBundleCompressedMagicStr), + sizeof(symbols::kOffloadBundleCompressedMagicStr) - 1) == 0; +} + +static bool IsCodeObjectElf(const void* image) { + const amd::Elf64_Ehdr* ehdr = reinterpret_cast(image); + return ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == llvm::ELF::ELFOSABI_AMDGPU_HSA; +} + +static bool UncompressAndPopulateCodeObject( + const void* image, const std::set& unique_isa_names, + std::map>& code_obj_map) { + auto remove_file_extension = [](const std::string& input) -> std::string { + size_t index = input.find_last_of("."); + std::string ret = input.substr(0, index); + return ret; + }; + + std::vector bundle_ids_str; + std::set unique_ids; + + for (const auto& isa_name : unique_isa_names) { + bundle_ids_str.push_back(std::string(symbols::kOffloadKindHipv4_) + isa_name); + } + + std::vector bundle_ids; + bundle_ids.reserve(bundle_ids_str.size()); + for (auto& bundle_id_str : bundle_ids_str) { + bundle_ids.push_back(bundle_id_str.c_str()); + } + + const auto obheader = + reinterpret_cast(image); + const size_t size = obheader->totalSize; + + bool passed = false; + do { + comgr_helper::ComgrDataSetUniqueHandle bundled_co, unbundled_co; + comgr_helper::ComgrDataUniqueHandle input_bundle; + if (auto comgr_status = bundled_co.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in creating bundled_co"); + break; } + if (auto comgr_status = unbundled_co.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in creating unbundled_co"); + break; + } + + if (auto comgr_status = input_bundle.Create(AMD_COMGR_DATA_KIND_OBJ_BUNDLE); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in creating input bundle"); + break; + } + + if (auto comgr_status = + amd::Comgr::set_data(input_bundle.get(), size, static_cast(image)); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in setting image data to bundle"); + break; + } + + if (auto comgr_status = amd::Comgr::set_data_name(input_bundle.get(), symbols::kHipFatBinName); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in setting image data to bundle"); + break; + } + + if (auto comgr_status = amd::Comgr::data_set_add(bundled_co.get(), input_bundle.get()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in adding data set"); + break; + } + + comgr_helper::ComgrActionInfoUniqueHandle unbundle_action; + if (auto comgr_status = unbundle_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in creating unbundle action"); + break; + } + + if (auto comgr_status = amd::Comgr::action_info_set_bundle_entry_ids( + unbundle_action.get(), bundle_ids.data(), bundle_ids.size()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Error in setting bundle entry ids"); + break; + } + + if (auto comgr_status = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, unbundle_action.get(), + bundled_co.get(), unbundled_co.get()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to unbundle code object"); + break; + } + + size_t count = 0; + if (auto comgr_status = amd::Comgr::action_data_count(unbundled_co.get(), + AMD_COMGR_DATA_KIND_EXECUTABLE, &count); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to get data count of unbundled code object"); + break; + } + + for (size_t i = 0; i < count; i++) { + amd_comgr_data_t item; + if (auto comgr_status = amd::Comgr::action_data_get_data( + unbundled_co.get(), AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to get data unbundled code object"); + break; + } + + size_t item_name_size = 0; + if (auto comgr_status = amd::Comgr::get_data_name(item, &item_name_size, nullptr); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to get data size"); + break; + } + + std::string item_bundle_id(item_name_size, 0); + if (auto comgr_status = + amd::Comgr::get_data_name(item, &item_name_size, item_bundle_id.data()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to get data"); + break; + } + + size_t item_size = 0; + if (auto comgr_status = amd::Comgr::get_data(item, &item_size, nullptr); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to get data size"); + break; + } + + if (item_size > 0) { + char* item_data = new char[item_size]; + if (auto comgr_status = amd::Comgr::get_data(item, &item_size, item_data); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to get data"); + break; + } + + std::string bundle_entry = remove_file_extension( + std::string(item_bundle_id.c_str() + sizeof(symbols::kOffloadHipV4FatBinName_) - 1)); + LogPrintfInfo("Inserting bundle entry of %s : size: %d, data: %p", bundle_entry.c_str(), + item_size, item_data); + code_obj_map[bundle_entry] = std::make_pair(item_data, item_size); + } + } + passed = true; + } while (0); + + return passed; +} + +static bool PopulateCodeObjectMap( + const void* image, const std::set& unique_isa_names, + std::map>& code_obj_map) { + bool passed = false; + do { + comgr_helper::ComgrDataUniqueHandle data_object; + if (auto comgr_status = data_object.Create(AMD_COMGR_DATA_KIND_FATBIN); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Creating data object failed with status %d ", comgr_status); + break; + } + + // There is no way to find size of offload bundle, so we pass 4096 here. + if (auto comgr_status = + amd::Comgr::set_data(data_object.get(), 4096, reinterpret_cast(image)); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); + break; + } + + // Create a query list using COMGR info for unique ISAs. + std::vector query_list_array; + query_list_array.reserve(unique_isa_names.size()); + for (const auto& isa_name : unique_isa_names) { + auto& item = query_list_array.emplace_back(); + item.isa = isa_name.c_str(); + item.size = 0; + item.offset = 0; + } + + // Look up the code object info passing the query list. + if (auto comgr_status = amd::Comgr::lookup_code_object( + data_object.get(), query_list_array.data(), unique_isa_names.size()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); + break; + } + + for (const auto& item : query_list_array) { + if (item.size > 0) { + char* d = new char[item.size]; + std::memcpy(reinterpret_cast(d), reinterpret_cast(image) + item.offset, + item.size); + code_obj_map[item.isa] = std::make_pair(d, item.size); + } + } + + passed = true; + } while (0); + return passed; +} + +hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector& devices) { + if (fname_.empty() && image_ == nullptr) { + LogError("Both Filename and image cannot be null"); + return hipErrorInvalidValue; + } + + if (image_ != nullptr) { if (!amd::Os::FindFileNameFromAddress(image_, &fname_, &foffset_)) { fname_ = std::string(""); foffset_ = 0; } - } - - // If file name & path are available (or it is passed to you), then get the file desc to use - // COMGR file slice APIs. - if (image_ == nullptr && fname_.size() > 0) { - // Get File Handle & size of the file. + } else { ufd_ = PlatformState::instance().GetUniqueFileHandle(fname_.c_str()); if (ufd_ == nullptr) { return hipErrorFileNotFound; @@ -197,264 +482,183 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector unique_isa_names; + const std::string spirv_isa_name{"spirv64-amd-amdhsa--amdgcnspirv"}; + unique_isa_names.insert(spirv_isa_name); // Insert SPIRV ISA name + for (auto device : devices) { + std::string device_name = device->devices()[0]->isa().isaName(); + unique_isa_names.insert(device_name); + auto generic_name = TargetToGeneric(device_name); + LogPrintfInfo("Looking up generic name of : %s - %s", device_name.c_str(), + generic_name.c_str()); + if (!generic_name.empty()) { + unique_isa_names.insert(generic_name); + } + } + + std::map> code_obj_map; //!< code object map + if (is_compressed) { + if (!UncompressAndPopulateCodeObject(image_, unique_isa_names, code_obj_map)) { + return hipErrorInvalidImage; + } + } else { // uncompressed code object + if (!PopulateCodeObjectMap(image_, unique_isa_names, code_obj_map)) { + return hipErrorInvalidImage; + } + } + + hipError_t hip_status = hipErrorInvalidImage; do { - bool isCompressed = false; - // If the image ptr is not clang offload bundle then just directly point the image. - if (!CodeObject::IsClangOffloadMagicBundle(image_, isCompressed)) { - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - uint64_t elf_size = CodeObject::ElfSize(image_); - if (elf_size == 0) { - hip_status = hipErrorInvalidImage; - break; - } - hip_status = AddDevProgram(devices[dev_idx], image_, elf_size, 0); + bool spirv_isa_found = code_obj_map.find(spirv_isa_name) != code_obj_map.end(); + for (auto device : devices) { + std::string device_name = device->devices()[0]->isa().isaName(); + auto generic_target_name = TargetToGeneric(device_name); // Generic Code Object + auto native_co = code_obj_map.find(device_name); // Native Code Object + auto generic_co = code_obj_map.find(generic_target_name); // generic Code Object + LogPrintfInfo("Device name: %s Generic name: %s", device_name.c_str(), + generic_target_name.c_str()); + + // If the size is not 0, that means we found the native isa code object + if (native_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) { + LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId()); + + // We need to do this because there is existing mechanism which deletes code object in + // destructor. Ideally next set of refactor should sort it. + char* co = new char[native_co->second.second]; + std::memcpy(co, reinterpret_cast(native_co->second.first), + native_co->second.second); + hip_status = AddDevProgram(device, co, native_co->second.second, 0); if (hip_status != hipSuccess) { break; } - } - break; - } - if (!isCompressed) { - if (CodeObject::containGenericTarget(image_)) { - LogInfo("offload bundle contains generic target code object"); - containGenericTarget = true; - } - } - if (isCompressed || containGenericTarget) { - size_t major = 0, minor = 0; - amd::Comgr::get_version(&major, &minor); - if ((major == 2 && minor >= 8) || major > 2) { - hip_status = ExtractFatBinaryUsingCOMGR(image_, devices); - break; - } else if (isCompressed) { - LogPrintfError("comgr %zu.%zu cannot support compressed mode which requires comgr 2.8+", - major, minor); - hip_status = hipErrorNotSupported; - break; - } - } - // Create a data object, if it fails return error - if ((comgr_status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)) != - AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("Creating data object failed with status %d ", comgr_status); - hip_status = hipErrorInvalidValue; - break; - } - - if (ufd_ != nullptr && amd::Os::isValidFileDesc(ufd_->fdesc_)) { - LogPrintfError("Have valid file!%d", ufd_->fdesc_); - } - - if (image_ != nullptr) { - // Using the image ptr, map the data object. - if ((comgr_status = - amd::Comgr::set_data(data_object, 4096, reinterpret_cast(image_))) != - AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); - hip_status = hipErrorInvalidValue; - break; - } - } else { - guarantee(false, "Cannot have both fname_ and image_ as nullptr"); - } - - // Find the unique number of ISAs needed for this COMGR query. - std::unordered_map> unique_isa_names; - for (auto device : devices) { - std::string device_name = device->devices()[0]->isa().isaName(); - unique_isa_names.insert({device_name, std::make_pair(0, 0)}); - } - - // there are two spirv targets, spirv64-amd-amdhsa--amdgcnspirv and - // spirv64-amd-amdhsa-unknown-amdgcnspirv. - // eventually we will remove spirv64-amd-amdhsa--amdgcnspirv - const std::vector spirv_isa_names = {"spirv64-amd-amdhsa--amdgcnspirv", - "spirv64-amd-amdhsa-unknown-amdgcnspirv"}; - for (const auto& spirv_isa_name : spirv_isa_names) { - unique_isa_names.insert({spirv_isa_name, std::make_pair(0, 0)}); - } - - // Create a query list using COMGR info for unique ISAs. - std::vector query_list_array; - query_list_array.reserve(unique_isa_names.size()); - for (const auto& isa_name : unique_isa_names) { - auto& item = query_list_array.emplace_back(); - item.isa = isa_name.first.c_str(); - item.size = 0; - item.offset = 0; - } - - // Look up the code object info passing the query list. - if ((comgr_status = amd::Comgr::lookup_code_object(data_object, query_list_array.data(), - unique_isa_names.size())) != - AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); - hip_status = hipErrorInvalidValue; - break; - } - - for (const auto& item : query_list_array) { - auto unique_it = unique_isa_names.find(item.isa); - guarantee(unique_isa_names.cend() != unique_it, "Cannot find unique isa "); - unique_it->second = std::pair(static_cast(item.size), - static_cast(item.offset)); - } - - bool spirv_isa_found = false; - decltype(unique_isa_names.begin()) spirv_isa_handle; - for (const auto& spirv_isa_name : spirv_isa_names) { - auto iter = unique_isa_names.find(spirv_isa_name); - if (iter->second.first != 0) { - spirv_isa_found = true; - spirv_isa_handle = iter; - } - } - bool get_spirv_data_res = false; - std::once_flag get_spirv_data_flag; - std::unordered_map> compiled_co; // code object cache - comgr_helper::ComgrDataSetUniqueHandle spirv_data_set; - comgr_helper::ComgrDataUniqueHandle spirv_data; - - auto get_spirv_data = [&]() { - if (comgr_status = spirv_data_set.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to create SPIRV Data set"); - return; - } - - if (comgr_status = spirv_data.Create(AMD_COMGR_DATA_KIND_SPIRV); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to create SPIRV Data"); - return; - } - - if (comgr_status = - amd::Comgr::set_data(spirv_data.get(), spirv_isa_handle->second.first /* size */, - reinterpret_cast(const_cast(image_)) + - spirv_isa_handle->second.second /* buffer */); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to assign data in comgr"); - return; - } - - if (comgr_status = amd::Comgr::set_data_name(spirv_data.get(), "hip_code_object.spv"); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to set data name"); - return; - } - - if (comgr_status = amd::Comgr::data_set_add(spirv_data_set.get(), spirv_data.get()); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to add spir data"); - return; - } - - get_spirv_data_res = true; - }; - - LogPrintfInfo("Searching for code objects, HIP_FORCE_SPIRV_CODEOBJECT: %d", - HIP_FORCE_SPIRV_CODEOBJECT); - - for (auto device : devices) { - std::string device_name = device->devices()[0]->isa().isaName(); - auto dev_it = unique_isa_names.find(device_name); - // If the size is not 0, that means we found the native isa code object - if (dev_it->second.first != 0 && !HIP_FORCE_SPIRV_CODEOBJECT) { - LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId()); - guarantee(unique_isa_names.cend() != dev_it, - "Cannot find the device name in the unique device name"); - hip_status = AddDevProgram( - device, reinterpret_cast
(const_cast(image_)) + dev_it->second.second, - dev_it->second.first, dev_it->second.second); + } else if (generic_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) { + LogPrintfInfo("Using Generic code object: %s : %s", device->devices()[0]->isa().targetId(), + generic_target_name.c_str()); + char* co = new char[generic_co->second.second]; + std::memcpy(co, reinterpret_cast(generic_co->second.first), + generic_co->second.second); + hip_status = AddDevProgram(device, co, generic_co->second.second, 0); if (hip_status != hipSuccess) { break; } } else if (spirv_isa_found) { - std::call_once(get_spirv_data_flag, get_spirv_data); - - if(!get_spirv_data_res) { - hip_status = hipErrorInvalidValue; - break; - } std::string target_id = device->devices()[0]->isa().targetId(); - if (auto code_iter = compiled_co.find(target_id); code_iter != compiled_co.end()) { - // We have already compiled for it, lets reuse the code object - char* co = new char[code_iter->second.second]; - std::memcpy(co, code_iter->second.first, code_iter->second.second); - LogPrintfInfo("reusing code object for: %s", target_id.c_str()); - hip_status = AddDevProgram(device, co, code_iter->second.second, 0); - if (hip_status != hipSuccess) { - break; - } - continue; - } - - LogPrintfInfo("Creating ISA for: %s from spirv", target_id.c_str()); - comgr_helper::ComgrActionInfoUniqueHandle reloc_action; std::string isa = "amdgcn-amd-amdhsa--" + target_id; - if (comgr_status = reloc_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to create action"); - break; - } - - if (comgr_status = amd::Comgr::action_info_set_isa_name(reloc_action.get(), isa.c_str()); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to set ISA name"); - break; - } - - if (comgr_status = amd::Comgr::action_info_set_device_lib_linking(reloc_action.get(), true); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to set device lib linking"); - break; - } - - if (comgr_status = amd::Comgr::action_info_set_option_list( - reloc_action.get(), nullptr /* options list */, 0 /* options size */); - comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to set option list"); - break; - } + LogPrintfInfo("Creating ISA for: %s from spirv", target_id.c_str()); + comgr_helper::ComgrDataSetUniqueHandle spirv_data_set; comgr_helper::ComgrDataSetUniqueHandle reloc_data; - if (comgr_status = reloc_data.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to create reloc data set"); + comgr_helper::ComgrDataUniqueHandle spirv_data; + comgr_helper::ComgrActionInfoUniqueHandle reloc_action; + + if (auto comgr_status = spirv_data_set.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to create SPIRV Data set"); break; } - if (comgr_status = + if (auto comgr_status = spirv_data.Create(AMD_COMGR_DATA_KIND_SPIRV); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to create SPIRV Data"); + break; + } + + auto spirv_isa_handle = code_obj_map.find(spirv_isa_name); + if (auto comgr_status = + amd::Comgr::set_data(spirv_data.get(), spirv_isa_handle->second.second /* size */, + reinterpret_cast(spirv_isa_handle->second.first) + /* buffer */); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to assign SPIRV data"); + break; + } + + if (auto comgr_status = amd::Comgr::set_data_name(spirv_data.get(), "hip_code_object.spv"); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to set spirv data's name"); + break; + } + + if (auto comgr_status = amd::Comgr::data_set_add(spirv_data_set.get(), spirv_data.get()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to add spir data to data set"); + break; + } + + if (auto comgr_status = reloc_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to create reloc action"); + break; + } + + if (auto comgr_status = + amd::Comgr::action_info_set_isa_name(reloc_action.get(), isa.c_str()); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to set reloc action's isa name"); + break; + } + + if (auto comgr_status = reloc_data.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to create reloc data"); + break; + } + + if (auto comgr_status = + amd::Comgr::action_info_set_device_lib_linking(reloc_action.get(), true); + comgr_status != AMD_COMGR_STATUS_SUCCESS) { + LogError("Failed to set device lib linking for reloc action"); + break; + } + + if (auto comgr_status = amd::Comgr::do_action(AMD_COMGR_ACTION_COMPILE_SPIRV_TO_RELOCATABLE, reloc_action.get(), spirv_data_set.get(), reloc_data.get()); comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to compile to reloc"); - LogError("Failed to do action: codegen bc ot reloc"); + LogError("Failed to compile spirv to reloc"); break; } comgr_helper::ComgrActionInfoUniqueHandle exe_action; comgr_helper::ComgrDataSetUniqueHandle exe_output; - - if (comgr_status = exe_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("Failed to create action"); + if (auto comgr_status = exe_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { LogError("Failed to create exe action"); break; } - if (comgr_status = amd::Comgr::action_info_set_isa_name(exe_action.get(), isa.c_str()); + if (auto comgr_status = amd::Comgr::action_info_set_isa_name(exe_action.get(), isa.c_str()); comgr_status != AMD_COMGR_STATUS_SUCCESS) { LogError("Failed to set exe action isa name"); + break; } - if (comgr_status = exe_output.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { + if (auto comgr_status = exe_output.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) { LogError("Failed to create exe output"); break; } - if (comgr_status = + if (auto comgr_status = amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE, exe_action.get(), reloc_data.get(), exe_output.get()); comgr_status != AMD_COMGR_STATUS_SUCCESS) { @@ -463,74 +667,51 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectordevices()[0]->isa().targetId(), HIP_FORCE_SPIRV_CODEOBJECT); - hip_status = hipErrorInvalidValue; break; } } } while (0); - if (comgr_status != AMD_COMGR_STATUS_SUCCESS) { - LogError("comgr API call failed"); - hip_status = hipErrorInvalidValue; - } - - // Clean up file and memory resouces if hip_status failed for some reason. - if (hip_status != hipSuccess && hip_status != hipErrorInvalidKernelFile) { - ReleaseImageAndFile(); - } - - if (data_object.handle) { - if ((comgr_status = amd::Comgr::release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) { - LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status); - return hipErrorInvalidValue; - } + // release code objects + for (const auto& co : code_obj_map) { + delete[] reinterpret_cast(co.second.first); } return hip_status; } -hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devices) { - amd::ScopedLock lock(FatBinaryLock()); - - bool containGenericTarget = false; - return ExtractFatBinaryUsingCOMGR(devices, containGenericTarget); -} - hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size, size_t binary_offset) { int devID = device->deviceId(); @@ -569,55 +750,4 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) { } return hipSuccess; } - -// ================================================================================================ -hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data, - const std::vector& devices) { - hipError_t hip_status = hipSuccess; - // At this line, image should be a valid ptr. - guarantee(data != nullptr, "Image cannot be nullptr"); - - do { - std::vector> code_objs; - // Copy device names - std::vector device_names; - device_names.reserve(devices.size()); - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); - } - - hip_status = - CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0, device_names, code_objs); - if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) { - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - if (code_objs[dev_idx].first) { - hip_status = - AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, code_objs[dev_idx].second, 0); - if (hip_status != hipSuccess) { - return hip_status; - } - } else { - // This is the case of hipErrorNoBinaryForGpu which will finally fail app on device - // without code object - LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(), - device_names[dev_idx].c_str()); - } - } - } else if (hip_status == hipErrorInvalidKernelFile) { - hip_status = hipSuccess; - // If the image ptr is not clang offload bundle then just directly point the image. - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - hip_status = AddDevProgram(devices[dev_idx], data, CodeObject::ElfSize(data), 0); - if (hip_status != hipSuccess) { - return hip_status; - } - } - } else { - LogPrintfError("CodeObject::extractCodeObjectFromFatBinaryUsingComgr failed with status %d\n", - hip_status); - } - } while (0); - - return hip_status; -} } // namespace hip diff --git a/projects/clr/hipamd/src/hip_fatbin.hpp b/projects/clr/hipamd/src/hip_fatbin.hpp index fd9dc686bf..e21c018007 100644 --- a/projects/clr/hipamd/src/hip_fatbin.hpp +++ b/projects/clr/hipamd/src/hip_fatbin.hpp @@ -39,26 +39,7 @@ public: FatBinaryInfo(const char* fname, const void* image); ~FatBinaryInfo(); - // Loads Fat binary from file or image, unbundles COs for devices. - hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices, - bool &containGenericTarget); - - /** - * @brief Extract code object from fatbin using comgr unbundling action via calling - * CodeObject::extractCodeObjectFromFatBinaryUsingComgr - * - * @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed, - * compressed and even SPIR-V(to be supported later) mode. - * @param[in] devices devices whose code objects will be extracted. - * Returned error code - * - * @return #hipSuccess, #hipErrorNoBinaryForGpu, #hipErrorInvalidValue - * - * @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr() - */ - hipError_t ExtractFatBinaryUsingCOMGR(const void* data, - const std::vector& devices); - hipError_t ExtractFatBinary(const std::vector& devices); + hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices); hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size, size_t binary_offset); hipError_t BuildProgram(const int device_id);