SWDEV-433371 - Support new comgr unbundling action

Support new comgr unbundling action api to extract codebjects
in compressed and uncompressed modes.

Create HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION ENV to
toggle new path and old path.
If HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION=false(default),
   uncompressed codeobject will go old path for better perf,
   compressed   codeobject will go new path.
If HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION=true,
   both uncompressed and compressed codeobjects will go new
   path.

Add comgr wrapper for
   amd_comgr_action_info_set_bundle_entry_ids()

Change-Id: I79952f132fe21249296685ee12cae05a4f9aec32


[ROCm/clr commit: d0050ce309]
Этот коммит содержится в:
Tao Sang
2024-05-15 14:57:13 -04:00
коммит произвёл Maneesh Gupta
родитель 5436ebf0b9
Коммит 7bf8d102fc
7 изменённых файлов: 552 добавлений и 34 удалений
+397 -15
Просмотреть файл
@@ -30,21 +30,32 @@ THE SOFTWARE.
#include "hip_internal.hpp"
#include "platform/program.hpp"
#include <elf/elf.hpp>
#include "comgrctx.hpp"
namespace hip {
hipError_t ihipFree(void* ptr);
// forward declaration of methods required for managed variables
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
namespace {
constexpr char kOffloadBundleMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
// In uncompressed mode
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
sizeof(kOffloadBundleUncompressedMagicStr);
// In compressed mode
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
sizeof(kOffloadBundleCompressedMagicStr);
constexpr char kOffloadKindHip[] = "hip";
constexpr char kOffloadKindHipv4[] = "hipv4";
constexpr char kOffloadKindHcc[] = "hcc";
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
constexpr char kHipFatBinName[] = "hipfatbin";
constexpr char kHipFatBinName_[] = "hipfatbin-";
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
// ClangOFFLOADBundle info.
static constexpr size_t kOffloadBundleMagicStrSize = sizeof(kOffloadBundleMagicStr);
// Clang Offload bundler description & Header.
// Clang Offload bundler description & Header in uncompressed mode.
struct __ClangOffloadBundleInfo {
uint64_t offset;
uint64_t size;
@@ -52,16 +63,38 @@ struct __ClangOffloadBundleInfo {
const char bundleEntryId[1];
};
struct __ClangOffloadBundleHeader {
const char magic[kOffloadBundleMagicStrSize - 1];
struct __ClangOffloadBundleUncompressedHeader {
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
uint64_t numOfCodeObjects;
__ClangOffloadBundleInfo desc[1];
};
// Clang Offload bundler description & Header in compressed mode.
struct __ClangOffloadBundleCompressedHeader {
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
uint16_t versionNumber;
uint16_t compressionMethod;
uint32_t totalSize;
uint32_t uncompressedBinarySize;
uint64_t Hash;
const char compressedBinarydesc[1];
};
} // namespace
bool CodeObject::IsClangOffloadMagicBundle(const void* data) {
std::string magic(reinterpret_cast<const char*>(data), kOffloadBundleMagicStrSize - 1);
return magic.compare(kOffloadBundleMagicStr) ? false : true;
bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) {
std::string magic(reinterpret_cast<const char*>(data),
kOffloadBundleUncompressedMagicStrSize - 1);
if (!magic.compare(kOffloadBundleUncompressedMagicStr)) {
isCompressed = false;
return true;
}
std::string magic1(reinterpret_cast<const char*>(data),
kOffloadBundleCompressedMagicStrSize - 1);
if (!magic1.compare(kOffloadBundleCompressedMagicStr)) {
isCompressed = true;
return true;
}
return false;
}
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
@@ -356,7 +389,7 @@ static bool consume(std::string& input, std::string consume_) {
// Trim String till character, will be used to get gpuname
// example: input is gfx908:sram-ecc+ and trim char is :
// input will become sram-ecc+.
// input will become :sram-ecc+.
static std::string trimName(std::string& input, char trim) {
auto pos_ = input.find(trim);
auto res = input;
@@ -369,6 +402,18 @@ static std::string trimName(std::string& input, char trim) {
return res;
}
// Trim String till character, will be used to get bundle entry ID.
// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is .
// input will become amdgcn-amd-amdhsa--gfx1035
static bool trimNameTail(std::string& input, char trim) {
auto pos_ = input.rfind(trim);
if (pos_ == std::string::npos) {
return false;
}
input = input.substr(0, pos_);
return true;
}
static char getFeatureValue(std::string& input, std::string feature) {
char res = ' ';
if (consume(input, std::move(feature))) {
@@ -483,8 +528,8 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(
hipError_t CodeObject::extractCodeObjectFromFatBinary(
const void* data, const std::vector<std::string>& agent_triple_target_ids,
std::vector<std::pair<const void*, size_t>>& code_objs) {
std::string magic((const char*)data, kOffloadBundleMagicStrSize);
if (magic.compare(kOffloadBundleMagicStr)) {
std::string magic((const char*)data, kOffloadBundleUncompressedMagicStrSize);
if (magic.compare(kOffloadBundleUncompressedMagicStr)) {
return hipErrorInvalidKernelFile;
}
@@ -494,7 +539,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
code_objs.push_back(std::make_pair(nullptr, 0));
}
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const auto* desc = &obheader->desc[0];
size_t num_code_objs = code_objs.size();
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
@@ -529,7 +574,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(),
((code_objs[i].first) ? "Found" : "Not Found"));
}
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const auto* desc = &obheader->desc[0];
LogPrintfError("%s", " Bundled Code Objects:");
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
@@ -554,6 +599,343 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
}
}
// ================================================================================================
size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) {
if (isCompressed) {
const auto obheader = reinterpret_cast<const __ClangOffloadBundleCompressedHeader*>(data);
return obheader->totalSize;
} else {
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const __ClangOffloadBundleInfo* desc = &obheader->desc[0];
uint64_t i = 0;
while (++i < obheader->numOfCodeObjects) {
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize);
}
return desc->offset + desc->size;
}
}
// ================================================================================================
hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
const void* data, size_t size, const std::vector<std::string>& agent_triple_target_ids,
std::vector<std::pair<const void*, size_t>>& code_objs) {
hipError_t hipStatus = hipSuccess;
amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS;
const size_t num_devices = agent_triple_target_ids.size();
size_t num_code_objs = num_devices;
bool isCompressed = false;
if (!IsClangOffloadMagicBundle(data, isCompressed)) {
LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false", data);
// hipModuleLoadData() will possibly call here
return hipErrorInvalidKernelFile;
}
if (size == 0) size = getFatbinSize(data, isCompressed);
amd_comgr_data_t dataCodeObj{0};
amd_comgr_data_set_t dataSetBundled{0};
amd_comgr_data_set_t dataSetUnbundled{0};
amd_comgr_action_info_t actionInfoUnbundle{0};
amd_comgr_data_t item{0};
std::set<std::string> devicesSet{}; // To make sure device is unique
std::vector<const char*> bundleEntryIDs{};
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
for (size_t i = 0; i < num_devices; i++) {
devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
}
for (auto& device : devicesSet) {
bundleEntryIDs.push_back(device.c_str());
}
do {
// Create Bundled dataset
comgrStatus = amd::Comgr::create_data_set(&dataSetBundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// CodeObject
comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError(
"amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast<const char*>(data));
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError(
"amd::Comgr::set_data_name("
") failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Set up ActionInfo
comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids(
actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size());
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError(
"amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh",
bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Unbundle
comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle,
dataSetBundled, dataSetUnbundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Check CodeObject count
size_t count = 0;
comgrStatus =
amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Initialize Code objects
code_objs.reserve(num_code_objs);
for (size_t i = 0; i < num_code_objs; i++) {
code_objs.push_back(std::make_pair(nullptr, 0));
}
for (size_t i = 0; i < count; i++) {
if (num_code_objs == 0) break;
size_t itemSize = 0;
comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled,
AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
std::string bundleEntryId(itemSize, 0);
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data());
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count,
itemSize, comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
// Remove bundleEntryId_
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
// This is behavour in comgr unbundling which is subject to change.
// So just give info.
LogPrintfInfo("bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(),
kOffloadHipV4FatBinName_);
}
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
char* itemData = nullptr;
for (size_t dev = 0; dev < num_devices; ++dev) {
if (code_objs[dev].first) continue;
// LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev,
// agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
if (bundleEntryId == agent_triple_target_ids[dev]) {
if (itemData == nullptr) {
itemSize = 0;
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count,
comgrStatus);
hipStatus = hipErrorInvalidValue;
break;
}
if (itemSize == 0) {
// If there isn't a code object for this device,
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
// valid name but no data. We need continue searching for other devices
LogPrintfInfo(
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev,
agent_triple_target_ids[dev].c_str());
continue;
}
// itemData should be deleted in fatbin's destructor
itemData = new char[itemSize];
if (itemData == nullptr) {
LogError("no enough memory");
hipStatus = hipErrorOutOfMemory;
break;
}
comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count,
itemSize, comgrStatus);
hipStatus = hipErrorInvalidValue;
delete[] itemData;
itemData = nullptr;
break;
}
}
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
--num_code_objs;
LogPrintfInfo(
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), "
"Size=%zu, num_code_objs=%zu",
dev, agent_triple_target_ids[dev].c_str(), itemData,
isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs);
}
}
comgrStatus = amd::Comgr::release_data(item);
item.handle = 0;
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
}
if (hipStatus != hipSuccess) break;
}
} while (0);
if (hipStatus == hipSuccess && num_code_objs != 0) {
hipStatus = hipErrorNoBinaryForGpu;
// Leave it for debug purpose in uncompressed mode.
if (!isCompressed) {
LogPrintfError("%s",
"hipErrorNoBinaryForGpu: Unable to find code object for all current devices!");
LogPrintfError("%s", " Devices:");
for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(),
((code_objs[i].first) ? "Found" : "Not Found"));
}
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const auto* desc = &obheader->desc[0];
LogPrintfError("%s", " Bundled Code Objects:");
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) +
desc->bundleEntryIdSize)) {
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
const void* image =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
std::string co_triple_target_id;
bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id);
if (valid_co) {
LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(),
co_triple_target_id.c_str());
} else {
LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str());
}
}
}
}
// Cleanup
if (actionInfoUnbundle.handle) {
comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (dataSetBundled.handle) {
comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (dataSetUnbundled.handle) {
comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh",
comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (dataCodeObj.handle) {
comgrStatus = amd::Comgr::release_data(dataCodeObj);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
if (item.handle) {
comgrStatus = amd::Comgr::release_data(item);
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
hipStatus = hipErrorInvalidValue;
}
}
return hipStatus;
}
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
amd::ScopedLock lock(dclock_);
+27 -3
Просмотреть файл
@@ -63,15 +63,39 @@ class CodeObject {
static uint64_t ElfSize(const void* emi);
static bool IsClangOffloadMagicBundle(const void* data);
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
protected:
// Return size of fat bin
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
/**
* @brief Extract code object from fatbin using comgr unbundling action
*
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
* compressed and even SPIR-V(to be supported later) mode.
* @param[in] size the size of the bundle data
* @param[in] agent_triple_target_ids isa names of concerned devices
* @param[out] code_objs the buffer address and size pairs of extracted code objects of
* concerned devices
* Returned error code
*
* @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue,
* #hipErrorNoBinaryForGpu
*
* @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
* const std::vector<hip::Device*>& devices)
*/
static hipError_t extractCodeObjectFromFatBinaryUsingComgr(
const void* data, size_t size, const std::vector<std::string>& devices,
std::vector<std::pair<const void*, size_t>>& code_objs);
protected:
//Given an ptr to image or file, extracts to code object
//for corresponding devices
static hipError_t extractCodeObjectFromFatBinary(const void*,
const std::vector<std::string>&,
std::vector<std::pair<const void*, size_t>>&);
CodeObject() {}
private:
friend const std::vector<hipModule_t>& modules();
+87 -5
Просмотреть файл
@@ -25,6 +25,7 @@ THE SOFTWARE.
#include <unordered_map>
#include "hip_code_object.hpp"
#include "hip_platform.hpp"
#include "comgrctx.hpp"
namespace hip {
@@ -50,14 +51,21 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : fdesc_(amd:
}
FatBinaryInfo::~FatBinaryInfo() {
// Different devices in the same model have the same binary_image_
std::set<const void*> toDelete;
// Release per device fat bin info.
for (auto* fbd: fatbin_dev_info_) {
if (fbd != nullptr) {
if (fbd->binary_image_ && fbd->binary_offset_ == 0 && fbd->binary_image_ != image_) {
toDelete.insert(fbd->binary_image_);
}
delete fbd;
}
}
for (auto itemData : toDelete) {
LogPrintfInfo("~FatBinaryInfo(%p) will delete binary_image_ %p", this, itemData);
delete[] reinterpret_cast<const char*>(itemData);
}
if (!HIP_USE_RUNTIME_UNBUNDLER) {
// Using COMGR Unbundler
if (ufd_ && amd::Os::isValidFileDesc(ufd_->fdesc_)) {
@@ -163,9 +171,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
fname_.c_str());
do {
bool isCompressed = false;
// If the image ptr is not clang offload bundle then just directly point the image.
if (!CodeObject::IsClangOffloadMagicBundle(image_)) {
if (!CodeObject::IsClangOffloadMagicBundle(image_, isCompressed)) {
for (size_t dev_idx=0; dev_idx < devices.size(); ++dev_idx) {
fatbin_dev_info_[devices[dev_idx]->deviceId()]
= new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0);
@@ -178,7 +186,22 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
}
break;
}
if (isCompressed || HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) {
size_t major = 0, minor = 0;
amd::Comgr::get_version(&major, &minor);
if (major >= 2 && minor >= 8) {
hip_status = ExtractFatBinaryUsingCOMGR(image_, devices);
break;
} else if (isCompressed) {
LogPrintfError(
"comgr %zu.%zu cannot support commpressed mode which need comgr 2.8+", major, minor);
hip_status = hipErrorNotSupported;
break;
} else if (HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) {
HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION = false;
LogInfo("HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION = true only works on comgr 2.8+");
}
}
// Create a data object, if it fails return error
if ((comgr_status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object))
!= AMD_COMGR_STATUS_SUCCESS) {
@@ -442,4 +465,63 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) {
return hipSuccess;
}
// ================================================================================================
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data,
const std::vector<hip::Device*>& devices) {
hipError_t hip_status = hipSuccess;
// At this line, image should be a valid ptr.
guarantee(data != nullptr, "Image cannot be nullptr");
do {
std::vector<std::pair<const void*, size_t>> code_objs;
// Copy device names
std::vector<std::string> device_names;
device_names.reserve(devices.size());
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
}
hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0,
device_names, code_objs);
if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) {
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
if (code_objs[dev_idx].first) {
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
break;
}
} else {
// This is the case of hipErrorNoBinaryForGpu which will finally fail app on device
// without code object
LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(),
device_names[dev_idx].c_str());
}
}
} else if (hip_status == hipErrorInvalidKernelFile) {
hip_status = hipSuccess;
// If the image ptr is not clang offload bundle then just directly point the image.
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(data, CodeObject::ElfSize(data), 0);
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) {
hip_status = hipErrorOutOfMemory;
break;
}
}
} else {
LogPrintfError(
"CodeObject::extractCodeObjectFromFatBinaryUsingComgr failed with status %d\n",
hip_status);
}
} while (0);
return hip_status;
}
} //namespace : hip
+16
Просмотреть файл
@@ -65,6 +65,22 @@ public:
// Loads Fat binary from file or image, unbundles COs for devices.
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
/**
* @brief Extract code object from fatbin using comgr unbundling action via calling
* CodeObject::extractCodeObjectFromFatBinaryUsingComgr
*
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
* compressed and even SPIR-V(to be supported later) mode.
* @param[in] devices devices whose code objects will be extracted.
* Returned error code
*
* @return #hipSuccess, #hipErrorNoBinaryForGpu, #hipErrorInvalidValue
*
* @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr()
*/
hipError_t ExtractFatBinaryUsingCOMGR(const void* data,
const std::vector<hip::Device*>& devices);
hipError_t ExtractFatBinary(const std::vector<hip::Device*>& devices);
hipError_t AddDevProgram(const int device_id);
hipError_t BuildProgram(const int device_id);
+5 -1
Просмотреть файл
@@ -114,12 +114,16 @@ bool Comgr::LoadLib(bool is_versioned) {
GET_COMGR_SYMBOL(amd_comgr_iterate_symbols)
GET_COMGR_SYMBOL(amd_comgr_symbol_lookup)
GET_COMGR_SYMBOL(amd_comgr_symbol_get_info)
GET_COMGR_OPTIONAL_SYMBOL(amd_comgr_demangle_symbol_name)
GET_COMGR_SYMBOL(amd_comgr_demangle_symbol_name)
GET_COMGR_SYMBOL(amd_comgr_populate_mangled_names)
GET_COMGR_SYMBOL(amd_comgr_get_mangled_name)
GET_COMGR_SYMBOL(amd_comgr_populate_name_expression_map)
GET_COMGR_SYMBOL(amd_comgr_map_name_expression_to_symbol_name)
GET_COMGR_OPTIONAL_SYMBOL(amd_comgr_action_info_set_bundle_entry_ids)
is_ready_ = true;
size_t major = 0, minor = 0;
get_version(&major, &minor);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loaded COMGR library version %zu.%zu.", major, minor);
return true;
}
+18 -10
Просмотреть файл
@@ -76,6 +76,7 @@ typedef amd_comgr_status_t (*t_amd_comgr_populate_mangled_names)(amd_comgr_data_
typedef amd_comgr_status_t (*t_amd_comgr_get_mangled_name)(amd_comgr_data_t data, size_t index, size_t *size, char *mangled_name);
typedef amd_comgr_status_t (*t_amd_comgr_populate_name_expression_map)(amd_comgr_data_t data, size_t *count);
typedef amd_comgr_status_t (*t_amd_comgr_map_name_expression_to_symbol_name)(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name);
typedef amd_comgr_status_t (*t_amd_comgr_action_info_set_bundle_entry_ids)(amd_comgr_action_info_t action_info, const char* bundle_entry_ids[], size_t count);
struct ComgrEntryPoints {
void* handle;
@@ -129,13 +130,16 @@ struct ComgrEntryPoints {
t_amd_comgr_get_mangled_name amd_comgr_get_mangled_name;
t_amd_comgr_populate_name_expression_map amd_comgr_populate_name_expression_map;
t_amd_comgr_map_name_expression_to_symbol_name amd_comgr_map_name_expression_to_symbol_name;
t_amd_comgr_action_info_set_bundle_entry_ids amd_comgr_action_info_set_bundle_entry_ids;
};
#ifdef COMGR_DYN_DLL
#define COMGR_DYN(NAME) cep_.NAME
#define GET_COMGR_SYMBOL(NAME) cep_.NAME = \
reinterpret_cast<t_##NAME>(Os::getSymbol(cep_.handle, #NAME)); \
if (nullptr == cep_.NAME) { return false; }
if (nullptr == cep_.NAME) { \
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "Failed to load COMGR function %s", #NAME); \
return false; }
#define GET_COMGR_OPTIONAL_SYMBOL(NAME) cep_.NAME = \
reinterpret_cast<t_##NAME>(Os::getSymbol(cep_.handle, #NAME));
#else
@@ -289,13 +293,6 @@ public:
}
static amd_comgr_status_t demangle_symbol_name(amd_comgr_data_t MangledSymbolName,
amd_comgr_data_t* DemangledSymbolName) {
#if defined(COMGR_DYN_DLL)
if (cep_.amd_comgr_demangle_symbol_name == nullptr) {
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
"Failed to load COMGR function amd_comgr_demangle_symbol_name");
return AMD_COMGR_STATUS_ERROR;
}
#endif
return COMGR_DYN(amd_comgr_demangle_symbol_name)(MangledSymbolName, DemangledSymbolName);
}
static amd_comgr_status_t populate_mangled_names(amd_comgr_data_t data, size_t *count) {
@@ -310,8 +307,19 @@ public:
static amd_comgr_status_t map_name_expression_to_symbol_name(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name) {
return COMGR_DYN(amd_comgr_map_name_expression_to_symbol_name)(data, size, name_expression, symbol_name);
}
static amd_comgr_status_t action_info_set_bundle_entry_ids(amd_comgr_action_info_t action_info,
const char* bundle_entry_ids[], size_t count) {
#if defined(COMGR_DYN_DLL)
if (cep_.amd_comgr_action_info_set_bundle_entry_ids == nullptr) {
// comgr version 2.7 or less is loaded
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
"Failed to load COMGR function amd_comgr_action_info_set_bundle_entry_ids");
return AMD_COMGR_STATUS_ERROR;
}
#endif
return COMGR_DYN(amd_comgr_action_info_set_bundle_entry_ids)(action_info, bundle_entry_ids,
count);
}
private:
static ComgrEntryPoints cep_;
static bool is_ready_;
+2
Просмотреть файл
@@ -249,6 +249,8 @@ release(bool, HIP_VMEM_MANAGE_SUPPORT, true, \
"Virtual Memory Management Support") \
release(bool, DEBUG_HIP_GRAPH_DOT_PRINT, false, \
"Enable/Disable graph debug dot print dump") \
release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \
"Force to always use new comgr unbundling action") \
namespace amd {