SWDEV-433371 - Support new comgr unbundling action
Support new comgr unbundling action api to extract codebjects
in compressed and uncompressed modes.
Create HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION ENV to
toggle new path and old path.
If HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION=false(default),
uncompressed codeobject will go old path for better perf,
compressed codeobject will go new path.
If HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION=true,
both uncompressed and compressed codeobjects will go new
path.
Add comgr wrapper for
amd_comgr_action_info_set_bundle_entry_ids()
Change-Id: I79952f132fe21249296685ee12cae05a4f9aec32
[ROCm/clr commit: d0050ce309]
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
5436ebf0b9
Коммит
7bf8d102fc
@@ -30,21 +30,32 @@ THE SOFTWARE.
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include <elf/elf.hpp>
|
||||
#include "comgrctx.hpp"
|
||||
namespace hip {
|
||||
hipError_t ihipFree(void* ptr);
|
||||
// forward declaration of methods required for managed variables
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
namespace {
|
||||
constexpr char kOffloadBundleMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
|
||||
// In uncompressed mode
|
||||
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
|
||||
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleUncompressedMagicStr);
|
||||
|
||||
// In compressed mode
|
||||
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
|
||||
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleCompressedMagicStr);
|
||||
|
||||
constexpr char kOffloadKindHip[] = "hip";
|
||||
constexpr char kOffloadKindHipv4[] = "hipv4";
|
||||
constexpr char kOffloadKindHcc[] = "hcc";
|
||||
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
|
||||
constexpr char kHipFatBinName[] = "hipfatbin";
|
||||
constexpr char kHipFatBinName_[] = "hipfatbin-";
|
||||
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
|
||||
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
|
||||
|
||||
// ClangOFFLOADBundle info.
|
||||
static constexpr size_t kOffloadBundleMagicStrSize = sizeof(kOffloadBundleMagicStr);
|
||||
|
||||
// Clang Offload bundler description & Header.
|
||||
// Clang Offload bundler description & Header in uncompressed mode.
|
||||
struct __ClangOffloadBundleInfo {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
@@ -52,16 +63,38 @@ struct __ClangOffloadBundleInfo {
|
||||
const char bundleEntryId[1];
|
||||
};
|
||||
|
||||
struct __ClangOffloadBundleHeader {
|
||||
const char magic[kOffloadBundleMagicStrSize - 1];
|
||||
struct __ClangOffloadBundleUncompressedHeader {
|
||||
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
|
||||
uint64_t numOfCodeObjects;
|
||||
__ClangOffloadBundleInfo desc[1];
|
||||
};
|
||||
|
||||
// Clang Offload bundler description & Header in compressed mode.
|
||||
struct __ClangOffloadBundleCompressedHeader {
|
||||
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
|
||||
uint16_t versionNumber;
|
||||
uint16_t compressionMethod;
|
||||
uint32_t totalSize;
|
||||
uint32_t uncompressedBinarySize;
|
||||
uint64_t Hash;
|
||||
const char compressedBinarydesc[1];
|
||||
};
|
||||
} // namespace
|
||||
|
||||
bool CodeObject::IsClangOffloadMagicBundle(const void* data) {
|
||||
std::string magic(reinterpret_cast<const char*>(data), kOffloadBundleMagicStrSize - 1);
|
||||
return magic.compare(kOffloadBundleMagicStr) ? false : true;
|
||||
bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) {
|
||||
std::string magic(reinterpret_cast<const char*>(data),
|
||||
kOffloadBundleUncompressedMagicStrSize - 1);
|
||||
if (!magic.compare(kOffloadBundleUncompressedMagicStr)) {
|
||||
isCompressed = false;
|
||||
return true;
|
||||
}
|
||||
std::string magic1(reinterpret_cast<const char*>(data),
|
||||
kOffloadBundleCompressedMagicStrSize - 1);
|
||||
if (!magic1.compare(kOffloadBundleCompressedMagicStr)) {
|
||||
isCompressed = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
|
||||
@@ -356,7 +389,7 @@ static bool consume(std::string& input, std::string consume_) {
|
||||
|
||||
// Trim String till character, will be used to get gpuname
|
||||
// example: input is gfx908:sram-ecc+ and trim char is :
|
||||
// input will become sram-ecc+.
|
||||
// input will become :sram-ecc+.
|
||||
static std::string trimName(std::string& input, char trim) {
|
||||
auto pos_ = input.find(trim);
|
||||
auto res = input;
|
||||
@@ -369,6 +402,18 @@ static std::string trimName(std::string& input, char trim) {
|
||||
return res;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get bundle entry ID.
|
||||
// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is .
|
||||
// input will become amdgcn-amd-amdhsa--gfx1035
|
||||
static bool trimNameTail(std::string& input, char trim) {
|
||||
auto pos_ = input.rfind(trim);
|
||||
if (pos_ == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
input = input.substr(0, pos_);
|
||||
return true;
|
||||
}
|
||||
|
||||
static char getFeatureValue(std::string& input, std::string feature) {
|
||||
char res = ' ';
|
||||
if (consume(input, std::move(feature))) {
|
||||
@@ -483,8 +528,8 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
const void* data, const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
std::string magic((const char*)data, kOffloadBundleMagicStrSize);
|
||||
if (magic.compare(kOffloadBundleMagicStr)) {
|
||||
std::string magic((const char*)data, kOffloadBundleUncompressedMagicStrSize);
|
||||
if (magic.compare(kOffloadBundleUncompressedMagicStr)) {
|
||||
return hipErrorInvalidKernelFile;
|
||||
}
|
||||
|
||||
@@ -494,7 +539,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
code_objs.push_back(std::make_pair(nullptr, 0));
|
||||
}
|
||||
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
size_t num_code_objs = code_objs.size();
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
@@ -529,7 +574,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(),
|
||||
((code_objs[i].first) ? "Found" : "Not Found"));
|
||||
}
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
LogPrintfError("%s", " Bundled Code Objects:");
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
@@ -554,6 +599,343 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) {
|
||||
if (isCompressed) {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleCompressedHeader*>(data);
|
||||
return obheader->totalSize;
|
||||
} else {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const __ClangOffloadBundleInfo* desc = &obheader->desc[0];
|
||||
uint64_t i = 0;
|
||||
while (++i < obheader->numOfCodeObjects) {
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize);
|
||||
}
|
||||
return desc->offset + desc->size;
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
const void* data, size_t size, const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
hipError_t hipStatus = hipSuccess;
|
||||
amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS;
|
||||
|
||||
const size_t num_devices = agent_triple_target_ids.size();
|
||||
size_t num_code_objs = num_devices;
|
||||
bool isCompressed = false;
|
||||
if (!IsClangOffloadMagicBundle(data, isCompressed)) {
|
||||
LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false", data);
|
||||
// hipModuleLoadData() will possibly call here
|
||||
return hipErrorInvalidKernelFile;
|
||||
}
|
||||
|
||||
if (size == 0) size = getFatbinSize(data, isCompressed);
|
||||
|
||||
amd_comgr_data_t dataCodeObj{0};
|
||||
amd_comgr_data_set_t dataSetBundled{0};
|
||||
amd_comgr_data_set_t dataSetUnbundled{0};
|
||||
amd_comgr_action_info_t actionInfoUnbundle{0};
|
||||
amd_comgr_data_t item{0};
|
||||
|
||||
|
||||
std::set<std::string> devicesSet{}; // To make sure device is unique
|
||||
std::vector<const char*> bundleEntryIDs{};
|
||||
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
|
||||
for (size_t i = 0; i < num_devices; i++) {
|
||||
devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
|
||||
}
|
||||
|
||||
for (auto& device : devicesSet) {
|
||||
bundleEntryIDs.push_back(device.c_str());
|
||||
}
|
||||
|
||||
do {
|
||||
// Create Bundled dataset
|
||||
comgrStatus = amd::Comgr::create_data_set(&dataSetBundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// CodeObject
|
||||
comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast<const char*>(data));
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::set_data_name("
|
||||
") failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
// Set up ActionInfo
|
||||
comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids(
|
||||
actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size());
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh",
|
||||
bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Unbundle
|
||||
comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle,
|
||||
dataSetBundled, dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check CodeObject count
|
||||
size_t count = 0;
|
||||
comgrStatus =
|
||||
amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Initialize Code objects
|
||||
code_objs.reserve(num_code_objs);
|
||||
for (size_t i = 0; i < num_code_objs; i++) {
|
||||
code_objs.push_back(std::make_pair(nullptr, 0));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (num_code_objs == 0) break;
|
||||
|
||||
size_t itemSize = 0;
|
||||
comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled,
|
||||
AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
std::string bundleEntryId(itemSize, 0);
|
||||
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data());
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count,
|
||||
itemSize, comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
// Remove bundleEntryId_
|
||||
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
|
||||
// This is behavour in comgr unbundling which is subject to change.
|
||||
// So just give info.
|
||||
LogPrintfInfo("bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(),
|
||||
kOffloadHipV4FatBinName_);
|
||||
}
|
||||
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
|
||||
|
||||
char* itemData = nullptr;
|
||||
for (size_t dev = 0; dev < num_devices; ++dev) {
|
||||
if (code_objs[dev].first) continue;
|
||||
// LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev,
|
||||
// agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
|
||||
|
||||
if (bundleEntryId == agent_triple_target_ids[dev]) {
|
||||
if (itemData == nullptr) {
|
||||
itemSize = 0;
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (itemSize == 0) {
|
||||
// If there isn't a code object for this device,
|
||||
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
|
||||
// valid name but no data. We need continue searching for other devices
|
||||
LogPrintfInfo(
|
||||
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev,
|
||||
agent_triple_target_ids[dev].c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
// itemData should be deleted in fatbin's destructor
|
||||
itemData = new char[itemSize];
|
||||
if (itemData == nullptr) {
|
||||
LogError("no enough memory");
|
||||
hipStatus = hipErrorOutOfMemory;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count,
|
||||
itemSize, comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
delete[] itemData;
|
||||
itemData = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
|
||||
--num_code_objs;
|
||||
LogPrintfInfo(
|
||||
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), "
|
||||
"Size=%zu, num_code_objs=%zu",
|
||||
dev, agent_triple_target_ids[dev].c_str(), itemData,
|
||||
isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs);
|
||||
}
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::release_data(item);
|
||||
item.handle = 0;
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
if (hipStatus != hipSuccess) break;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
if (hipStatus == hipSuccess && num_code_objs != 0) {
|
||||
hipStatus = hipErrorNoBinaryForGpu;
|
||||
|
||||
// Leave it for debug purpose in uncompressed mode.
|
||||
if (!isCompressed) {
|
||||
LogPrintfError("%s",
|
||||
"hipErrorNoBinaryForGpu: Unable to find code object for all current devices!");
|
||||
LogPrintfError("%s", " Devices:");
|
||||
for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
|
||||
LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(),
|
||||
((code_objs[i].first) ? "Found" : "Not Found"));
|
||||
}
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
LogPrintfError("%s", " Bundled Code Objects:");
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) +
|
||||
desc->bundleEntryIdSize)) {
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
|
||||
std::string co_triple_target_id;
|
||||
bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id);
|
||||
|
||||
if (valid_co) {
|
||||
LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(),
|
||||
co_triple_target_id.c_str());
|
||||
} else {
|
||||
LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
if (actionInfoUnbundle.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
if (dataSetBundled.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataSetUnbundled.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataCodeObj.handle) {
|
||||
comgrStatus = amd::Comgr::release_data(dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (item.handle) {
|
||||
comgrStatus = amd::Comgr::release_data(item);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
return hipStatus;
|
||||
}
|
||||
|
||||
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
|
||||
amd::ScopedLock lock(dclock_);
|
||||
|
||||
|
||||
@@ -63,15 +63,39 @@ class CodeObject {
|
||||
|
||||
static uint64_t ElfSize(const void* emi);
|
||||
|
||||
static bool IsClangOffloadMagicBundle(const void* data);
|
||||
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
|
||||
|
||||
protected:
|
||||
// Return size of fat bin
|
||||
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
|
||||
|
||||
/**
|
||||
* @brief Extract code object from fatbin using comgr unbundling action
|
||||
*
|
||||
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
|
||||
* compressed and even SPIR-V(to be supported later) mode.
|
||||
* @param[in] size the size of the bundle data
|
||||
* @param[in] agent_triple_target_ids isa names of concerned devices
|
||||
* @param[out] code_objs the buffer address and size pairs of extracted code objects of
|
||||
* concerned devices
|
||||
* Returned error code
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue,
|
||||
* #hipErrorNoBinaryForGpu
|
||||
*
|
||||
* @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
|
||||
* const std::vector<hip::Device*>& devices)
|
||||
*/
|
||||
static hipError_t extractCodeObjectFromFatBinaryUsingComgr(
|
||||
const void* data, size_t size, const std::vector<std::string>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
|
||||
protected:
|
||||
//Given an ptr to image or file, extracts to code object
|
||||
//for corresponding devices
|
||||
static hipError_t extractCodeObjectFromFatBinary(const void*,
|
||||
const std::vector<std::string>&,
|
||||
std::vector<std::pair<const void*, size_t>>&);
|
||||
|
||||
|
||||
CodeObject() {}
|
||||
private:
|
||||
friend const std::vector<hipModule_t>& modules();
|
||||
|
||||
@@ -25,6 +25,7 @@ THE SOFTWARE.
|
||||
#include <unordered_map>
|
||||
#include "hip_code_object.hpp"
|
||||
#include "hip_platform.hpp"
|
||||
#include "comgrctx.hpp"
|
||||
|
||||
namespace hip {
|
||||
|
||||
@@ -50,14 +51,21 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : fdesc_(amd:
|
||||
}
|
||||
|
||||
FatBinaryInfo::~FatBinaryInfo() {
|
||||
|
||||
// Different devices in the same model have the same binary_image_
|
||||
std::set<const void*> toDelete;
|
||||
// Release per device fat bin info.
|
||||
for (auto* fbd: fatbin_dev_info_) {
|
||||
if (fbd != nullptr) {
|
||||
if (fbd->binary_image_ && fbd->binary_offset_ == 0 && fbd->binary_image_ != image_) {
|
||||
toDelete.insert(fbd->binary_image_);
|
||||
}
|
||||
delete fbd;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto itemData : toDelete) {
|
||||
LogPrintfInfo("~FatBinaryInfo(%p) will delete binary_image_ %p", this, itemData);
|
||||
delete[] reinterpret_cast<const char*>(itemData);
|
||||
}
|
||||
if (!HIP_USE_RUNTIME_UNBUNDLER) {
|
||||
// Using COMGR Unbundler
|
||||
if (ufd_ && amd::Os::isValidFileDesc(ufd_->fdesc_)) {
|
||||
@@ -163,9 +171,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
fname_.c_str());
|
||||
|
||||
do {
|
||||
|
||||
bool isCompressed = false;
|
||||
// If the image ptr is not clang offload bundle then just directly point the image.
|
||||
if (!CodeObject::IsClangOffloadMagicBundle(image_)) {
|
||||
if (!CodeObject::IsClangOffloadMagicBundle(image_, isCompressed)) {
|
||||
for (size_t dev_idx=0; dev_idx < devices.size(); ++dev_idx) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]
|
||||
= new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0);
|
||||
@@ -178,7 +186,22 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (isCompressed || HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) {
|
||||
size_t major = 0, minor = 0;
|
||||
amd::Comgr::get_version(&major, &minor);
|
||||
if (major >= 2 && minor >= 8) {
|
||||
hip_status = ExtractFatBinaryUsingCOMGR(image_, devices);
|
||||
break;
|
||||
} else if (isCompressed) {
|
||||
LogPrintfError(
|
||||
"comgr %zu.%zu cannot support commpressed mode which need comgr 2.8+", major, minor);
|
||||
hip_status = hipErrorNotSupported;
|
||||
break;
|
||||
} else if (HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) {
|
||||
HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION = false;
|
||||
LogInfo("HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION = true only works on comgr 2.8+");
|
||||
}
|
||||
}
|
||||
// Create a data object, if it fails return error
|
||||
if ((comgr_status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object))
|
||||
!= AMD_COMGR_STATUS_SUCCESS) {
|
||||
@@ -442,4 +465,63 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data,
|
||||
const std::vector<hip::Device*>& devices) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// At this line, image should be a valid ptr.
|
||||
guarantee(data != nullptr, "Image cannot be nullptr");
|
||||
|
||||
do {
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
// Copy device names
|
||||
std::vector<std::string> device_names;
|
||||
device_names.reserve(devices.size());
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
|
||||
}
|
||||
|
||||
hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0,
|
||||
device_names, code_objs);
|
||||
if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) {
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
if (code_objs[dev_idx].first) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
|
||||
new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
|
||||
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
|
||||
new amd::Program(*devices[dev_idx]->asContext());
|
||||
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// This is the case of hipErrorNoBinaryForGpu which will finally fail app on device
|
||||
// without code object
|
||||
LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(),
|
||||
device_names[dev_idx].c_str());
|
||||
}
|
||||
}
|
||||
} else if (hip_status == hipErrorInvalidKernelFile) {
|
||||
hip_status = hipSuccess;
|
||||
// If the image ptr is not clang offload bundle then just directly point the image.
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
|
||||
new FatBinaryDeviceInfo(data, CodeObject::ElfSize(data), 0);
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
|
||||
new amd::Program(*devices[dev_idx]->asContext());
|
||||
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) {
|
||||
hip_status = hipErrorOutOfMemory;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LogPrintfError(
|
||||
"CodeObject::extractCodeObjectFromFatBinaryUsingComgr failed with status %d\n",
|
||||
hip_status);
|
||||
}
|
||||
} while (0);
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
} //namespace : hip
|
||||
|
||||
@@ -65,6 +65,22 @@ public:
|
||||
|
||||
// Loads Fat binary from file or image, unbundles COs for devices.
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
|
||||
|
||||
/**
|
||||
* @brief Extract code object from fatbin using comgr unbundling action via calling
|
||||
* CodeObject::extractCodeObjectFromFatBinaryUsingComgr
|
||||
*
|
||||
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
|
||||
* compressed and even SPIR-V(to be supported later) mode.
|
||||
* @param[in] devices devices whose code objects will be extracted.
|
||||
* Returned error code
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorNoBinaryForGpu, #hipErrorInvalidValue
|
||||
*
|
||||
* @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr()
|
||||
*/
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const void* data,
|
||||
const std::vector<hip::Device*>& devices);
|
||||
hipError_t ExtractFatBinary(const std::vector<hip::Device*>& devices);
|
||||
hipError_t AddDevProgram(const int device_id);
|
||||
hipError_t BuildProgram(const int device_id);
|
||||
|
||||
@@ -114,12 +114,16 @@ bool Comgr::LoadLib(bool is_versioned) {
|
||||
GET_COMGR_SYMBOL(amd_comgr_iterate_symbols)
|
||||
GET_COMGR_SYMBOL(amd_comgr_symbol_lookup)
|
||||
GET_COMGR_SYMBOL(amd_comgr_symbol_get_info)
|
||||
GET_COMGR_OPTIONAL_SYMBOL(amd_comgr_demangle_symbol_name)
|
||||
GET_COMGR_SYMBOL(amd_comgr_demangle_symbol_name)
|
||||
GET_COMGR_SYMBOL(amd_comgr_populate_mangled_names)
|
||||
GET_COMGR_SYMBOL(amd_comgr_get_mangled_name)
|
||||
GET_COMGR_SYMBOL(amd_comgr_populate_name_expression_map)
|
||||
GET_COMGR_SYMBOL(amd_comgr_map_name_expression_to_symbol_name)
|
||||
GET_COMGR_OPTIONAL_SYMBOL(amd_comgr_action_info_set_bundle_entry_ids)
|
||||
is_ready_ = true;
|
||||
size_t major = 0, minor = 0;
|
||||
get_version(&major, &minor);
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loaded COMGR library version %zu.%zu.", major, minor);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ typedef amd_comgr_status_t (*t_amd_comgr_populate_mangled_names)(amd_comgr_data_
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_get_mangled_name)(amd_comgr_data_t data, size_t index, size_t *size, char *mangled_name);
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_populate_name_expression_map)(amd_comgr_data_t data, size_t *count);
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_map_name_expression_to_symbol_name)(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name);
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_action_info_set_bundle_entry_ids)(amd_comgr_action_info_t action_info, const char* bundle_entry_ids[], size_t count);
|
||||
|
||||
struct ComgrEntryPoints {
|
||||
void* handle;
|
||||
@@ -129,13 +130,16 @@ struct ComgrEntryPoints {
|
||||
t_amd_comgr_get_mangled_name amd_comgr_get_mangled_name;
|
||||
t_amd_comgr_populate_name_expression_map amd_comgr_populate_name_expression_map;
|
||||
t_amd_comgr_map_name_expression_to_symbol_name amd_comgr_map_name_expression_to_symbol_name;
|
||||
t_amd_comgr_action_info_set_bundle_entry_ids amd_comgr_action_info_set_bundle_entry_ids;
|
||||
};
|
||||
|
||||
#ifdef COMGR_DYN_DLL
|
||||
#define COMGR_DYN(NAME) cep_.NAME
|
||||
#define GET_COMGR_SYMBOL(NAME) cep_.NAME = \
|
||||
reinterpret_cast<t_##NAME>(Os::getSymbol(cep_.handle, #NAME)); \
|
||||
if (nullptr == cep_.NAME) { return false; }
|
||||
if (nullptr == cep_.NAME) { \
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "Failed to load COMGR function %s", #NAME); \
|
||||
return false; }
|
||||
#define GET_COMGR_OPTIONAL_SYMBOL(NAME) cep_.NAME = \
|
||||
reinterpret_cast<t_##NAME>(Os::getSymbol(cep_.handle, #NAME));
|
||||
#else
|
||||
@@ -289,13 +293,6 @@ public:
|
||||
}
|
||||
static amd_comgr_status_t demangle_symbol_name(amd_comgr_data_t MangledSymbolName,
|
||||
amd_comgr_data_t* DemangledSymbolName) {
|
||||
#if defined(COMGR_DYN_DLL)
|
||||
if (cep_.amd_comgr_demangle_symbol_name == nullptr) {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
|
||||
"Failed to load COMGR function amd_comgr_demangle_symbol_name");
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
#endif
|
||||
return COMGR_DYN(amd_comgr_demangle_symbol_name)(MangledSymbolName, DemangledSymbolName);
|
||||
}
|
||||
static amd_comgr_status_t populate_mangled_names(amd_comgr_data_t data, size_t *count) {
|
||||
@@ -310,8 +307,19 @@ public:
|
||||
static amd_comgr_status_t map_name_expression_to_symbol_name(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name) {
|
||||
return COMGR_DYN(amd_comgr_map_name_expression_to_symbol_name)(data, size, name_expression, symbol_name);
|
||||
}
|
||||
|
||||
|
||||
static amd_comgr_status_t action_info_set_bundle_entry_ids(amd_comgr_action_info_t action_info,
|
||||
const char* bundle_entry_ids[], size_t count) {
|
||||
#if defined(COMGR_DYN_DLL)
|
||||
if (cep_.amd_comgr_action_info_set_bundle_entry_ids == nullptr) {
|
||||
// comgr version 2.7 or less is loaded
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
|
||||
"Failed to load COMGR function amd_comgr_action_info_set_bundle_entry_ids");
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
#endif
|
||||
return COMGR_DYN(amd_comgr_action_info_set_bundle_entry_ids)(action_info, bundle_entry_ids,
|
||||
count);
|
||||
}
|
||||
private:
|
||||
static ComgrEntryPoints cep_;
|
||||
static bool is_ready_;
|
||||
|
||||
@@ -249,6 +249,8 @@ release(bool, HIP_VMEM_MANAGE_SUPPORT, true, \
|
||||
"Virtual Memory Management Support") \
|
||||
release(bool, DEBUG_HIP_GRAPH_DOT_PRINT, false, \
|
||||
"Enable/Disable graph debug dot print dump") \
|
||||
release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \
|
||||
"Force to always use new comgr unbundling action") \
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user