SWDEV-433371 - use comgr to unbundle code objects
1.Make runtime use comgr to unbundle code objects 2.Support compressed/uncompressed modes 3.Remove HIP_USE_RUNTIME_UNBUNDLER and HIPRTC_USE_RUNTIME_UNBUNDLER to simplify logics 4.Add comgr wrapper for amd_comgr_action_info_set_bundle_entry_ids() Change-Id: Ic41b1ad1b64cca1e31986437983a5146d52a7329
This commit is contained in:
@@ -30,21 +30,34 @@ THE SOFTWARE.
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include <elf/elf.hpp>
|
||||
#include "comgrctx.hpp"
|
||||
|
||||
namespace hip {
|
||||
hipError_t ihipFree(void* ptr);
|
||||
// forward declaration of methods required for managed variables
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
namespace {
|
||||
constexpr char kOffloadBundleMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
|
||||
// In uncompressed mode
|
||||
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
|
||||
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleUncompressedMagicStr);
|
||||
|
||||
//In compressed mode
|
||||
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
|
||||
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleCompressedMagicStr);
|
||||
|
||||
constexpr char kOffloadKindHip[] = "hip";
|
||||
constexpr char kOffloadKindHipv4[] = "hipv4";
|
||||
constexpr char kOffloadKindHcc[] = "hcc";
|
||||
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
|
||||
|
||||
constexpr char kHipFatBinName[] = "hipfatbin";
|
||||
constexpr char kHipFatBinName_[] = "hipfatbin-";
|
||||
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
|
||||
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
|
||||
// ClangOFFLOADBundle info.
|
||||
static constexpr size_t kOffloadBundleMagicStrSize = sizeof(kOffloadBundleMagicStr);
|
||||
|
||||
// Clang Offload bundler description & Header.
|
||||
// Clang Offload bundler description & Header in uncompressed mode.
|
||||
struct __ClangOffloadBundleInfo {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
@@ -52,16 +65,37 @@ struct __ClangOffloadBundleInfo {
|
||||
const char bundleEntryId[1];
|
||||
};
|
||||
|
||||
struct __ClangOffloadBundleHeader {
|
||||
const char magic[kOffloadBundleMagicStrSize - 1];
|
||||
struct __ClangOffloadBundleUncompressedHeader {
|
||||
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
|
||||
uint64_t numOfCodeObjects;
|
||||
__ClangOffloadBundleInfo desc[1];
|
||||
};
|
||||
|
||||
struct __ClangOffloadBundleCompressedHeader {
|
||||
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
|
||||
uint16_t versionNumber;
|
||||
uint16_t compressionMethod;
|
||||
uint32_t totalSize;
|
||||
uint32_t uncompressedBinarySize;
|
||||
uint64_t Hash;
|
||||
const char compressedBinarydesc[1];
|
||||
};
|
||||
} // namespace
|
||||
|
||||
bool CodeObject::IsClangOffloadMagicBundle(const void* data) {
|
||||
std::string magic(reinterpret_cast<const char*>(data), kOffloadBundleMagicStrSize - 1);
|
||||
return magic.compare(kOffloadBundleMagicStr) ? false : true;
|
||||
bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool &isCompressed) {
|
||||
std::string magic(reinterpret_cast<const char*>(data),
|
||||
kOffloadBundleUncompressedMagicStrSize - 1);
|
||||
if (!magic.compare(kOffloadBundleUncompressedMagicStr)) {
|
||||
isCompressed = false;
|
||||
return true;
|
||||
}
|
||||
std::string magic1(reinterpret_cast<const char*>(data),
|
||||
kOffloadBundleCompressedMagicStrSize - 1);
|
||||
if (!magic1.compare(kOffloadBundleCompressedMagicStr)) {
|
||||
isCompressed = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
|
||||
@@ -356,7 +390,7 @@ static bool consume(std::string& input, std::string consume_) {
|
||||
|
||||
// Trim String till character, will be used to get gpuname
|
||||
// example: input is gfx908:sram-ecc+ and trim char is :
|
||||
// input will become sram-ecc+.
|
||||
// input will become :sram-ecc+
|
||||
static std::string trimName(std::string& input, char trim) {
|
||||
auto pos_ = input.find(trim);
|
||||
auto res = input;
|
||||
@@ -369,6 +403,18 @@ static std::string trimName(std::string& input, char trim) {
|
||||
return res;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get bundle entry ID.
|
||||
// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is .
|
||||
// input will become amdgcn-amd-amdhsa--gfx1035
|
||||
static bool trimNameTail(std::string& input, char trim) {
|
||||
auto pos_ = input.rfind(trim);
|
||||
if (pos_ == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
input = input.substr(0, pos_);
|
||||
return true;
|
||||
}
|
||||
|
||||
static char getFeatureValue(std::string& input, std::string feature) {
|
||||
char res = ' ';
|
||||
if (consume(input, std::move(feature))) {
|
||||
@@ -447,111 +493,353 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
return true;
|
||||
}
|
||||
|
||||
// This will be moved to COMGR eventually
|
||||
hipError_t CodeObject::ExtractCodeObjectFromFile(
|
||||
amd::Os::FileDesc fdesc, size_t fsize, const void** image,
|
||||
const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
if (!amd::Os::isValidFileDesc(fdesc)) {
|
||||
return hipErrorFileNotFound;
|
||||
size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) {
|
||||
if (isCompressed) {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleCompressedHeader*>(data);
|
||||
return obheader->totalSize;
|
||||
} else {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const __ClangOffloadBundleInfo* desc = &obheader->desc[0];
|
||||
uint64_t i = 0;
|
||||
while (++i < obheader->numOfCodeObjects) {
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize);
|
||||
}
|
||||
return desc->offset + desc->size;
|
||||
}
|
||||
|
||||
// Map the file to memory, with offset 0.
|
||||
// file will be unmapped in ModuleUnload
|
||||
// const void* image = nullptr;
|
||||
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
// retrieve code_objs{binary_image, binary_size} for devices
|
||||
return extractCodeObjectFromFatBinary(*image, device_names, code_objs);
|
||||
}
|
||||
|
||||
// This will be moved to COMGR eventually
|
||||
hipError_t CodeObject::ExtractCodeObjectFromMemory(
|
||||
const void* data, const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs, std::string& uri) {
|
||||
// Get the URI from memory
|
||||
if (!amd::Os::GetURIFromMemory(data, 0, uri)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return extractCodeObjectFromFatBinary(data, device_names, code_objs);
|
||||
}
|
||||
|
||||
// This will be moved to COMGR eventually
|
||||
/**
|
||||
* @brief Extract code object from fatbin using comgr
|
||||
*
|
||||
* @param[in] data the bundle data(fatbin or loaded module data)
|
||||
* @param[in] size the size of the bundle data
|
||||
* @param[in] agent_triple_target_ids isa names of concerned devices
|
||||
* @param[out] code_objs the buffer address and size pairs of extracted code objects of
|
||||
* concerned devices
|
||||
* Returned error code
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue, #hipErrorNoBinaryForGpu
|
||||
*
|
||||
* @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR
|
||||
*/
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
const void* data, const std::vector<std::string>& agent_triple_target_ids,
|
||||
const void* data, size_t size, const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
std::string magic((const char*)data, kOffloadBundleMagicStrSize);
|
||||
if (magic.compare(kOffloadBundleMagicStr)) {
|
||||
hipError_t hipStatus = hipSuccess;
|
||||
amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS;
|
||||
|
||||
const size_t num_devices = agent_triple_target_ids.size();
|
||||
size_t num_code_objs = num_devices;
|
||||
bool isCompressed = false;
|
||||
if (!IsClangOffloadMagicBundle(data, isCompressed)) {
|
||||
LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false", data);
|
||||
// hipModuleLoadData() will possibly call here
|
||||
return hipErrorInvalidKernelFile;
|
||||
}
|
||||
|
||||
// Initialize Code objects
|
||||
code_objs.reserve(agent_triple_target_ids.size());
|
||||
for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
|
||||
code_objs.push_back(std::make_pair(nullptr, 0));
|
||||
if (size == 0) size = getFatbinSize(data, isCompressed);
|
||||
|
||||
amd_comgr_data_t dataCodeObj{0};
|
||||
amd_comgr_data_set_t dataSetBundled{0};
|
||||
amd_comgr_data_set_t dataSetUnbundled{0};
|
||||
amd_comgr_action_info_t actionInfoUnbundle{0};
|
||||
amd_comgr_data_t item{0};
|
||||
|
||||
|
||||
std::set<std::string> devicesSet{}; // To make sure device is unique
|
||||
std::vector<const char*> bundleEntryIDs{};
|
||||
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
|
||||
for (size_t i = 0; i < num_devices; i++) {
|
||||
devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
|
||||
}
|
||||
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
size_t num_code_objs = code_objs.size();
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) +
|
||||
desc->bundleEntryIdSize)) {
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
const size_t image_size = desc->size;
|
||||
for (auto& device : devicesSet) {
|
||||
bundleEntryIDs.push_back(device.c_str());
|
||||
}
|
||||
|
||||
if (num_code_objs == 0) break;
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
do {
|
||||
// Create Bundled dataset
|
||||
comgrStatus = amd::Comgr::create_data_set(&dataSetBundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
std::string co_triple_target_id;
|
||||
if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue;
|
||||
// CodeObject
|
||||
comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
|
||||
if (code_objs[dev].first) continue;
|
||||
if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) {
|
||||
code_objs[dev] = std::make_pair(image, image_size);
|
||||
--num_code_objs;
|
||||
comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast<const char *>(data));
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::set_data_name("") failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
// Set up ActionInfo
|
||||
comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids(
|
||||
actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size());
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh",
|
||||
bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Unbundle
|
||||
comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle, dataSetBundled,
|
||||
dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check CodeObject count
|
||||
size_t count = 0;
|
||||
comgrStatus =
|
||||
amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Initialize Code objects
|
||||
code_objs.reserve(num_code_objs);
|
||||
for (size_t i = 0; i < num_code_objs; i++) {
|
||||
code_objs.push_back(std::make_pair(nullptr, 0));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (num_code_objs == 0) break;
|
||||
|
||||
size_t itemSize = 0;
|
||||
comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled,
|
||||
AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
std::string bundleEntryId(itemSize, 0);
|
||||
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data());
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count,
|
||||
itemSize, comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
// Remove bundleEntryId_
|
||||
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
|
||||
// This is behavour in comgr unbundling which is subject to change.
|
||||
// So just give info.
|
||||
LogPrintfInfo("bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(),
|
||||
kOffloadHipV4FatBinName_);
|
||||
}
|
||||
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
|
||||
|
||||
char* itemData = nullptr;
|
||||
for (size_t dev = 0; dev < num_devices; ++dev) {
|
||||
if (code_objs[dev].first) continue;
|
||||
//LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev,
|
||||
// agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
|
||||
|
||||
if (bundleEntryId == agent_triple_target_ids[dev]) {
|
||||
if (itemData == nullptr) {
|
||||
itemSize = 0;
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (itemSize == 0) {
|
||||
// If there isn't a code object for this device,
|
||||
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
|
||||
// valid name but no data. We need continue searching for other devices
|
||||
LogPrintfInfo(
|
||||
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s",
|
||||
dev, agent_triple_target_ids[dev].c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
// itemData should be deleted in fatbin's destructor
|
||||
itemData = new char[itemSize];
|
||||
if (itemData == nullptr) {
|
||||
LogError("no enough memory");
|
||||
hipStatus = hipErrorOutOfMemory;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count,
|
||||
itemSize, comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
delete []itemData;
|
||||
itemData = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
|
||||
--num_code_objs;
|
||||
LogPrintfInfo(
|
||||
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), "
|
||||
"Size=%zu, num_code_objs=%zu",
|
||||
dev, agent_triple_target_ids[dev].c_str(), itemData,
|
||||
isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs);
|
||||
}
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::release_data(item);
|
||||
item.handle = 0;
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
if (hipStatus != hipSuccess) break;
|
||||
}
|
||||
} while(0);
|
||||
|
||||
if (hipStatus == hipSuccess && num_code_objs != 0) {
|
||||
hipStatus = hipErrorNoBinaryForGpu;
|
||||
|
||||
// Leave it for debug purpose in uncompressed mode.
|
||||
if (!isCompressed) {
|
||||
LogPrintfError("%s",
|
||||
"hipErrorNoBinaryForGpu: Unable to find code object for all current devices!");
|
||||
LogPrintfError("%s", " Devices:");
|
||||
for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
|
||||
LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(),
|
||||
((code_objs[i].first) ? "Found" : "Not Found"));
|
||||
}
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
LogPrintfError("%s", " Bundled Code Objects:");
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) +
|
||||
desc->bundleEntryIdSize)) {
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
|
||||
std::string co_triple_target_id;
|
||||
bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id);
|
||||
|
||||
if (valid_co) {
|
||||
LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(),
|
||||
co_triple_target_id.c_str());
|
||||
} else {
|
||||
LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_code_objs == 0) {
|
||||
return hipSuccess;
|
||||
} else {
|
||||
LogPrintfError("%s",
|
||||
"hipErrorNoBinaryForGpu: Unable to find code object for all current devices!");
|
||||
LogPrintfError("%s", " Devices:");
|
||||
for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
|
||||
LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(),
|
||||
((code_objs[i].first) ? "Found" : "Not Found"));
|
||||
}
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
LogPrintfError("%s", " Bundled Code Objects:");
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) +
|
||||
desc->bundleEntryIdSize)) {
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
|
||||
std::string co_triple_target_id;
|
||||
bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id);
|
||||
|
||||
if (valid_co) {
|
||||
LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(),
|
||||
co_triple_target_id.c_str());
|
||||
} else {
|
||||
LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str());
|
||||
}
|
||||
// Cleanup
|
||||
if (actionInfoUnbundle.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
return hipErrorNoBinaryForGpu;
|
||||
}
|
||||
if (dataSetBundled.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataSetUnbundled.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataCodeObj.handle) {
|
||||
comgrStatus = amd::Comgr::release_data(dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (item.handle) {
|
||||
comgrStatus = amd::Comgr::release_data(item);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
return hipStatus;
|
||||
}
|
||||
|
||||
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
|
||||
|
||||
@@ -48,31 +48,22 @@ class CodeObject {
|
||||
size_t binary_size);
|
||||
static hipError_t build_module(hipModule_t hmod, const std::vector<amd::Device*>& devices);
|
||||
|
||||
// Given an file desc and file size, extracts to code object for corresponding devices,
|
||||
// return code_objs{binary_ptr, binary_size}, which could be used to determine foffset
|
||||
static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize,
|
||||
const void ** image, const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
|
||||
// Given an ptr to memory, extracts to code object for corresponding devices,
|
||||
// returns code_objs{binary_ptr, binary_size} and uniform resource indicator
|
||||
static hipError_t ExtractCodeObjectFromMemory(const void* data,
|
||||
const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs,
|
||||
std::string& uri);
|
||||
|
||||
static uint64_t ElfSize(const void* emi);
|
||||
|
||||
static bool IsClangOffloadMagicBundle(const void* data);
|
||||
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
|
||||
|
||||
// Given an ptr to image or file, extracts to code object
|
||||
// for corresponding devices
|
||||
static hipError_t extractCodeObjectFromFatBinary(
|
||||
const void*, size_t, const std::vector<std::string>&,
|
||||
std::vector<std::pair<const void*, size_t>>&);
|
||||
|
||||
// Return size of fat bin
|
||||
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
|
||||
|
||||
protected:
|
||||
//Given an ptr to image or file, extracts to code object
|
||||
//for corresponding devices
|
||||
static hipError_t extractCodeObjectFromFatBinary(const void*,
|
||||
const std::vector<std::string>&,
|
||||
std::vector<std::pair<const void*, size_t>>&);
|
||||
|
||||
CodeObject() {}
|
||||
|
||||
private:
|
||||
friend const std::vector<hipModule_t>& modules();
|
||||
};
|
||||
|
||||
+69
-244
@@ -50,57 +50,46 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : fdesc_(amd:
|
||||
}
|
||||
|
||||
FatBinaryInfo::~FatBinaryInfo() {
|
||||
|
||||
// Different devices in the same model have the same binary_image_
|
||||
std::set<const void*> toDelete;
|
||||
// Release per device fat bin info.
|
||||
for (auto* fbd: fatbin_dev_info_) {
|
||||
if (fbd != nullptr) {
|
||||
if (fbd->binary_image_ && fbd->binary_offset_ == 0 && fbd->binary_image_ != image_) {
|
||||
// binary_image_ was allocated in CodeObject::extractCodeObjectFromFatBinary
|
||||
toDelete.insert(fbd->binary_image_);
|
||||
}
|
||||
delete fbd;
|
||||
}
|
||||
}
|
||||
|
||||
if (!HIP_USE_RUNTIME_UNBUNDLER) {
|
||||
// Using COMGR Unbundler
|
||||
if (ufd_ && amd::Os::isValidFileDesc(ufd_->fdesc_)) {
|
||||
// Check for ufd_ != nullptr, since sometimes, we never create unique_file_desc.
|
||||
if (ufd_->fsize_ && image_mapped_
|
||||
&& !amd::Os::MemoryUnmapFile(image_, ufd_->fsize_)) {
|
||||
LogPrintfError("Cannot unmap file for fdesc: %d fsize: %d", ufd_->fdesc_, ufd_->fsize_);
|
||||
assert(false);
|
||||
}
|
||||
if (!PlatformState::instance().CloseUniqueFileHandle(ufd_)) {
|
||||
LogPrintfError("Cannot close file for fdesc: %d", ufd_->fdesc_);
|
||||
assert(false);
|
||||
}
|
||||
for (auto itemData : toDelete) {
|
||||
LogPrintfInfo("~FatBinaryInfo(%p) will delete binary_image_ %p", this, itemData);
|
||||
delete[] reinterpret_cast<const char*>(itemData);
|
||||
}
|
||||
|
||||
// Using COMGR Unbundler
|
||||
if (ufd_ && amd::Os::isValidFileDesc(ufd_->fdesc_)) {
|
||||
// Check for ufd_ != nullptr, since sometimes, we never create unique_file_desc.
|
||||
if (ufd_->fsize_ && image_mapped_
|
||||
&& !amd::Os::MemoryUnmapFile(image_, ufd_->fsize_)) {
|
||||
LogPrintfError("Cannot unmap file for fdesc: %d fsize: %d", ufd_->fdesc_, ufd_->fsize_);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
fname_ = std::string();
|
||||
fdesc_ = amd::Os::FDescInit();
|
||||
fsize_ = 0;
|
||||
image_ = nullptr;
|
||||
uri_ = std::string();
|
||||
|
||||
if (0 == PlatformState::instance().UfdMapSize()) {
|
||||
LogError("All Unique FDs are closed");
|
||||
if (!PlatformState::instance().CloseUniqueFileHandle(ufd_)) {
|
||||
LogPrintfError("Cannot close file for fdesc: %d", ufd_->fdesc_);
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// Using Runtime Unbundler
|
||||
if (amd::Os::isValidFileDesc(fdesc_)) {
|
||||
if (fsize_ && !amd::Os::MemoryUnmapFile(image_, fsize_)) {
|
||||
LogPrintfError("Cannot unmap file for fdesc: %d fsize: %d", fdesc_, fsize_);
|
||||
assert(false);
|
||||
}
|
||||
if (!amd::Os::CloseFileHandle(fdesc_)) {
|
||||
LogPrintfError("Cannot close file for fdesc: %d", fdesc_);
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
fname_ = std::string();
|
||||
fdesc_ = amd::Os::FDescInit();
|
||||
fsize_ = 0;
|
||||
image_ = nullptr;
|
||||
uri_ = std::string();
|
||||
|
||||
fname_ = std::string();
|
||||
fdesc_ = amd::Os::FDescInit();
|
||||
fsize_ = 0;
|
||||
image_ = nullptr;
|
||||
uri_ = std::string();
|
||||
if (0 == PlatformState::instance().UfdMapSize()) {
|
||||
LogError("All Unique FDs are closed");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,11 +103,8 @@ void ListAllDeviceWithNoCOFromBundle(const std::unordered_map<std::string,
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices) {
|
||||
amd_comgr_data_t data_object {0};
|
||||
amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS;
|
||||
hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devices) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
// If image was passed as a pointer to our hipMod* api, we can try to extract the file name
|
||||
// if it was mapped by the app. Otherwise use the COMGR data API.
|
||||
if (fname_.size() == 0) {
|
||||
@@ -163,107 +149,52 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
fname_.c_str());
|
||||
|
||||
do {
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
// Copy device names
|
||||
std::vector<std::string> device_names;
|
||||
device_names.reserve(devices.size());
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
|
||||
}
|
||||
hip_status = CodeObject::extractCodeObjectFromFatBinary(
|
||||
image_, 0, device_names, code_objs);
|
||||
if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) {
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
if (code_objs[dev_idx].first) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]
|
||||
= new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
|
||||
|
||||
// If the image ptr is not clang offload bundle then just directly point the image.
|
||||
if (!CodeObject::IsClangOffloadMagicBundle(image_)) {
|
||||
for (size_t dev_idx=0; dev_idx < devices.size(); ++dev_idx) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]
|
||||
= new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0);
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_
|
||||
= new amd::Program(*devices[dev_idx]->asContext());
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_
|
||||
= new amd::Program(*devices[dev_idx]->asContext());
|
||||
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// This is the case of hipErrorNoBinaryForGpu which will finally fail app
|
||||
LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(),
|
||||
device_names[dev_idx].c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (hip_status == hipErrorInvalidKernelFile) {
|
||||
hip_status = hipSuccess;
|
||||
// If the image ptr is not clang offload bundle then just directly point the image.
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
|
||||
new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0);
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
|
||||
new amd::Program(*devices[dev_idx]->asContext());
|
||||
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) {
|
||||
hip_status = hipErrorOutOfMemory;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Create a data object, if it fails return error
|
||||
if ((comgr_status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object))
|
||||
!= AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Creating data object failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
#if !defined(_WIN32)
|
||||
// Using the file descriptor and file size, map the data object.
|
||||
if (amd::Os::isValidFileDesc(fdesc_)) {
|
||||
guarantee(fsize_ > 0, "Cannot have a file size of 0, fdesc: %d fname: %s",
|
||||
fdesc_, fname_.c_str());
|
||||
if ((comgr_status = amd_comgr_set_data_from_file_slice(data_object, fdesc_, foffset_,
|
||||
fsize_)) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
if (image_ != nullptr) {
|
||||
// Using the image ptr, map the data object.
|
||||
if ((comgr_status = amd_comgr_set_data(data_object, 4096,
|
||||
reinterpret_cast<const char*>(image_))) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
guarantee(false, "Cannot have both fname_ and image_ as nullptr");
|
||||
}
|
||||
|
||||
// Find the unique number of ISAs needed for this COMGR query.
|
||||
std::unordered_map<std::string, std::pair<size_t, size_t>> unique_isa_names;
|
||||
for (auto device : devices) {
|
||||
std::string device_name = device->devices()[0]->isa().isaName();
|
||||
unique_isa_names.insert({device_name, std::make_pair<size_t, size_t>(0,0)});
|
||||
}
|
||||
|
||||
// Create a query list using COMGR info for unique ISAs.
|
||||
std::vector<amd_comgr_code_object_info_t> query_list_array;
|
||||
query_list_array.reserve(unique_isa_names.size());
|
||||
for (const auto &isa_name : unique_isa_names) {
|
||||
auto &item = query_list_array.emplace_back();
|
||||
item.isa = isa_name.first.c_str();
|
||||
item.size = 0;
|
||||
item.offset = 0;
|
||||
}
|
||||
|
||||
// Look up the code object info passing the query list.
|
||||
if ((comgr_status = amd_comgr_lookup_code_object(data_object, query_list_array.data(),
|
||||
unique_isa_names.size())) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
for (const auto &item : query_list_array) {
|
||||
auto unique_it = unique_isa_names.find(item.isa);
|
||||
guarantee(unique_isa_names.cend() != unique_it, "Cannot find unique isa ");
|
||||
unique_it->second = std::pair<size_t, size_t>
|
||||
(static_cast<size_t>(item.size),
|
||||
static_cast<size_t>(item.offset));
|
||||
}
|
||||
|
||||
for (auto device : devices) {
|
||||
std::string device_name = device->devices()[0]->isa().isaName();
|
||||
auto dev_it = unique_isa_names.find(device_name);
|
||||
// If the size is 0, then COMGR API could not find the CO for this GPU device/ISA
|
||||
if (dev_it->second.first == 0) {
|
||||
LogPrintfError("Cannot find CO in the bundle %s for ISA: %s",
|
||||
fname_.c_str(), device_name.c_str());
|
||||
hip_status = hipErrorNoBinaryForGpu;
|
||||
ListAllDeviceWithNoCOFromBundle(unique_isa_names);
|
||||
break;
|
||||
}
|
||||
guarantee(unique_isa_names.cend() != dev_it,
|
||||
"Cannot find the device name in the unique device name");
|
||||
fatbin_dev_info_[device->deviceId()]
|
||||
= new FatBinaryDeviceInfo(reinterpret_cast<address>(const_cast<void*>(image_))
|
||||
+ dev_it->second.second, dev_it->second.first,
|
||||
dev_it->second.second);
|
||||
fatbin_dev_info_[device->deviceId()]->program_
|
||||
= new amd::Program(*(device->asContext()));
|
||||
else {
|
||||
LogPrintfError(
|
||||
"CodeObject::extractCodeObjectFromFatBinary failed with status %d\n",
|
||||
hip_status);
|
||||
}
|
||||
} while(0);
|
||||
|
||||
@@ -286,115 +217,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
fsize_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (data_object.handle) {
|
||||
if ((comgr_status = amd_comgr_release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devices) {
|
||||
if (!HIP_USE_RUNTIME_UNBUNDLER) {
|
||||
return ExtractFatBinaryUsingCOMGR(devices);
|
||||
}
|
||||
|
||||
hipError_t hip_error = hipSuccess;
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
|
||||
// Copy device names for Extract Code object File
|
||||
std::vector<std::string> device_names;
|
||||
device_names.reserve(devices.size());
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
|
||||
}
|
||||
|
||||
// We are given file name, get the file desc and file size
|
||||
if (fname_.size() > 0) {
|
||||
// Get File Handle & size of the file.
|
||||
if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) {
|
||||
return hipErrorFileNotFound;
|
||||
}
|
||||
if (fsize_ == 0) {
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
|
||||
// Extract the code object from file
|
||||
hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_,
|
||||
device_names, code_objs);
|
||||
|
||||
} else if (image_ != nullptr) {
|
||||
// We are directly given image pointer directly, try to extract file desc & file Size
|
||||
hip_error = CodeObject::ExtractCodeObjectFromMemory(image_,
|
||||
device_names, code_objs, uri_);
|
||||
} else {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
if (hip_error == hipErrorNoBinaryForGpu) {
|
||||
if (fname_.size() > 0) {
|
||||
LogPrintfError("hipErrorNoBinaryForGpu: Couldn't find binary for file: %s", fname_.c_str());
|
||||
} else {
|
||||
LogPrintfError("hipErrorNoBinaryForGpu: Couldn't find binary for ptr: 0x%x", image_);
|
||||
}
|
||||
|
||||
// For the condition: unable to find code object for all devices,
|
||||
// still extract available images to those devices owning them.
|
||||
// This helps users to work with ROCm if there is any supported
|
||||
// GFX on system.
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
if (code_objs[dev_idx].first) {
|
||||
// Calculate the offset wrt binary_image and the original image
|
||||
size_t offset_l
|
||||
= (reinterpret_cast<address>(const_cast<void*>(code_objs[dev_idx].first))
|
||||
- reinterpret_cast<address>(const_cast<void*>(image_)));
|
||||
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]
|
||||
= new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l);
|
||||
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_
|
||||
= new amd::Program(*devices[dev_idx]->asContext());
|
||||
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hip_error;
|
||||
}
|
||||
|
||||
if (hip_error == hipErrorInvalidKernelFile) {
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
// the image type is no CLANG_OFFLOAD_BUNDLER, image for current device directly passed
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]
|
||||
= new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0);
|
||||
}
|
||||
} else if(hip_error == hipSuccess) {
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
// Calculate the offset wrt binary_image and the original image
|
||||
size_t offset_l
|
||||
= (reinterpret_cast<address>(const_cast<void*>(code_objs[dev_idx].first))
|
||||
- reinterpret_cast<address>(const_cast<void*>(image_)));
|
||||
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]
|
||||
= new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_
|
||||
= new amd::Program(*devices[dev_idx]->asContext());
|
||||
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::AddDevProgram(const int device_id) {
|
||||
// Device Id bounds Check
|
||||
DeviceIdCheck(device_id);
|
||||
|
||||
@@ -64,7 +64,6 @@ public:
|
||||
~FatBinaryInfo();
|
||||
|
||||
// Loads Fat binary from file or image, unbundles COs for devices.
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
|
||||
hipError_t ExtractFatBinary(const std::vector<hip::Device*>& devices);
|
||||
hipError_t AddDevProgram(const int device_id);
|
||||
hipError_t BuildProgram(const int device_id);
|
||||
|
||||
@@ -66,10 +66,6 @@ hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned
|
||||
HIP_RETURN(PlatformState::instance().loadModule(module, 0, image));
|
||||
}
|
||||
|
||||
extern hipError_t __hipExtractCodeObjectFromFatBinary(
|
||||
const void* data, const std::vector<std::string>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
|
||||
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
|
||||
|
||||
|
||||
@@ -441,39 +441,6 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UnbundleBitCode(const std::vector<char>& bundled_llvm_bitcode, const std::string& isa,
|
||||
size_t& co_offset, size_t& co_size) {
|
||||
std::string magic(bundled_llvm_bitcode.begin(),
|
||||
bundled_llvm_bitcode.begin() + bundle_magic_string_size);
|
||||
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
|
||||
// Handle case where the whole file is unbundled
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string bundled_llvm_bitcode_s(bundled_llvm_bitcode.begin(),
|
||||
bundled_llvm_bitcode.begin() + bundled_llvm_bitcode.size());
|
||||
const void* data = reinterpret_cast<const void*>(bundled_llvm_bitcode_s.c_str());
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
for (uint64_t idx = 0; idx < obheader->numOfCodeObjects; ++idx,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) +
|
||||
desc->bundleEntryIdSize)) {
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
const size_t image_size = desc->size;
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
|
||||
// Check if the device id and code object id are compatible
|
||||
if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa)) {
|
||||
co_offset = (reinterpret_cast<uintptr_t>(image) - reinterpret_cast<uintptr_t>(data));
|
||||
co_size = image_size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool addCodeObjData(amd_comgr_data_set_t& input, const std::vector<char>& source,
|
||||
const std::string& name, const amd_comgr_data_kind_t type) {
|
||||
amd_comgr_data_t data;
|
||||
|
||||
@@ -31,8 +31,6 @@ THE SOFTWARE.
|
||||
|
||||
namespace hiprtc {
|
||||
namespace helpers {
|
||||
bool UnbundleBitCode(const std::vector<char>& bundled_bit_code, const std::string& isa,
|
||||
size_t& co_offset, size_t& co_size);
|
||||
bool addCodeObjData(amd_comgr_data_set_t& input, const std::vector<char>& source,
|
||||
const std::string& name, const amd_comgr_data_kind_t type);
|
||||
bool extractBuildLog(amd_comgr_data_set_t dataSet, std::string& buildLog);
|
||||
|
||||
@@ -545,8 +545,7 @@ amd_comgr_data_kind_t RTCLinkProgram::GetCOMGRDataKind(hiprtcJITInputType input_
|
||||
data_kind = AMD_COMGR_DATA_KIND_BC;
|
||||
break;
|
||||
case HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE:
|
||||
data_kind =
|
||||
HIPRTC_USE_RUNTIME_UNBUNDLER ? AMD_COMGR_DATA_KIND_BC : AMD_COMGR_DATA_KIND_BC_BUNDLE;
|
||||
data_kind = AMD_COMGR_DATA_KIND_BC_BUNDLE;
|
||||
break;
|
||||
case HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE:
|
||||
data_kind = AMD_COMGR_DATA_KIND_AR_BUNDLE;
|
||||
@@ -561,32 +560,13 @@ amd_comgr_data_kind_t RTCLinkProgram::GetCOMGRDataKind(hiprtcJITInputType input_
|
||||
|
||||
bool RTCLinkProgram::AddLinkerDataImpl(std::vector<char>& link_data, hiprtcJITInputType input_type,
|
||||
std::string& link_file_name) {
|
||||
std::vector<char> llvm_bitcode;
|
||||
// If this is bundled bitcode then unbundle this.
|
||||
if (HIPRTC_USE_RUNTIME_UNBUNDLER && input_type == HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE) {
|
||||
if (!findIsa()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t co_offset = 0;
|
||||
size_t co_size = 0;
|
||||
if (!UnbundleBitCode(link_data, isa_, co_offset, co_size)) {
|
||||
LogError("Error in hiprtc: unable to unbundle the llvm bitcode");
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm_bitcode.assign(link_data.begin() + co_offset, link_data.begin() + co_offset + co_size);
|
||||
} else {
|
||||
llvm_bitcode.assign(link_data.begin(), link_data.end());
|
||||
}
|
||||
|
||||
amd_comgr_data_kind_t data_kind;
|
||||
if ((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) {
|
||||
LogError("Cannot find the correct COMGR data kind");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!addCodeObjData(link_input_, llvm_bitcode, link_file_name, data_kind)) {
|
||||
if (!addCodeObjData(link_input_, link_data, link_file_name, data_kind)) {
|
||||
LogError("Error in hiprtc: unable to add linked code object");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -119,6 +119,7 @@ bool Comgr::LoadLib(bool is_versioned) {
|
||||
GET_COMGR_SYMBOL(amd_comgr_get_mangled_name)
|
||||
GET_COMGR_SYMBOL(amd_comgr_populate_name_expression_map)
|
||||
GET_COMGR_SYMBOL(amd_comgr_map_name_expression_to_symbol_name)
|
||||
GET_COMGR_SYMBOL(amd_comgr_action_info_set_bundle_entry_ids)
|
||||
is_ready_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -76,6 +76,7 @@ typedef amd_comgr_status_t (*t_amd_comgr_populate_mangled_names)(amd_comgr_data_
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_get_mangled_name)(amd_comgr_data_t data, size_t index, size_t *size, char *mangled_name);
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_populate_name_expression_map)(amd_comgr_data_t data, size_t *count);
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_map_name_expression_to_symbol_name)(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name);
|
||||
typedef amd_comgr_status_t (*t_amd_comgr_action_info_set_bundle_entry_ids)(amd_comgr_action_info_t action_info, const char* bundle_entry_ids[], size_t count);
|
||||
|
||||
struct ComgrEntryPoints {
|
||||
void* handle;
|
||||
@@ -129,6 +130,7 @@ struct ComgrEntryPoints {
|
||||
t_amd_comgr_get_mangled_name amd_comgr_get_mangled_name;
|
||||
t_amd_comgr_populate_name_expression_map amd_comgr_populate_name_expression_map;
|
||||
t_amd_comgr_map_name_expression_to_symbol_name amd_comgr_map_name_expression_to_symbol_name;
|
||||
t_amd_comgr_action_info_set_bundle_entry_ids amd_comgr_action_info_set_bundle_entry_ids;
|
||||
};
|
||||
|
||||
#ifdef COMGR_DYN_DLL
|
||||
@@ -310,7 +312,11 @@ public:
|
||||
static amd_comgr_status_t map_name_expression_to_symbol_name(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name) {
|
||||
return COMGR_DYN(amd_comgr_map_name_expression_to_symbol_name)(data, size, name_expression, symbol_name);
|
||||
}
|
||||
|
||||
static amd_comgr_status_t action_info_set_bundle_entry_ids(amd_comgr_action_info_t action_info,
|
||||
const char* bundle_entry_ids[], size_t count) {
|
||||
return COMGR_DYN(amd_comgr_action_info_set_bundle_entry_ids)(action_info, bundle_entry_ids,
|
||||
count);
|
||||
}
|
||||
|
||||
private:
|
||||
static ComgrEntryPoints cep_;
|
||||
|
||||
@@ -229,10 +229,6 @@ release(bool, ROC_SKIP_KERNEL_ARG_COPY, false, \
|
||||
"If true, then runtime can skip kernel arg copy") \
|
||||
release(bool, GPU_STREAMOPS_CP_WAIT, false, \
|
||||
"Force the stream wait memory operation to wait on CP.") \
|
||||
release(bool, HIP_USE_RUNTIME_UNBUNDLER, false, \
|
||||
"Force this to use Runtime code object unbundler.") \
|
||||
release(bool, HIPRTC_USE_RUNTIME_UNBUNDLER, false, \
|
||||
"Set this to true to force runtime unbundler in hiprtc.") \
|
||||
release(size_t, HIP_INITIAL_DM_SIZE, 8 * Mi, \
|
||||
"Set initial heap size for device malloc.") \
|
||||
release(bool, HIP_FORCE_DEV_KERNARG, true, \
|
||||
|
||||
Reference in New Issue
Block a user