SWDEV-540645 - use single path for code object loading (#773)
* use single path for code object loading
* Remove printf
* Address review comments
* Fix the declaration of ihipMallocManaged
[ROCm/clr commit: d7f90a3120]
Este commit está contenido en:
cometido por
GitHub
padre
0e0ca48a6a
commit
7fe61f9556
@@ -35,584 +35,6 @@ namespace hip {
|
||||
hipError_t ihipFree(void* ptr);
|
||||
// forward declaration of methods required for managed variables
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, size_t align = 0, bool use_host_ptr = 0);
|
||||
namespace {
|
||||
// In uncompressed mode
|
||||
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
|
||||
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleUncompressedMagicStr);
|
||||
|
||||
// In compressed mode
|
||||
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
|
||||
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleCompressedMagicStr);
|
||||
|
||||
constexpr char kOffloadKindHip[] = "hip";
|
||||
constexpr char kOffloadKindHipv4[] = "hipv4";
|
||||
constexpr char kOffloadKindHcc[] = "hcc";
|
||||
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
|
||||
constexpr char kHipFatBinName[] = "hipfatbin";
|
||||
constexpr char kHipFatBinName_[] = "hipfatbin-";
|
||||
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
|
||||
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
|
||||
|
||||
// Clang Offload bundler description & Header in uncompressed mode.
|
||||
struct __ClangOffloadBundleInfo {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
uint64_t bundleEntryIdSize;
|
||||
const char bundleEntryId[1];
|
||||
};
|
||||
|
||||
struct __ClangOffloadBundleUncompressedHeader {
|
||||
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
|
||||
uint64_t numOfCodeObjects;
|
||||
__ClangOffloadBundleInfo desc[1];
|
||||
};
|
||||
|
||||
// Clang Offload bundler description & Header in compressed mode.
|
||||
struct __ClangOffloadBundleCompressedHeader {
|
||||
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
|
||||
uint16_t versionNumber;
|
||||
uint16_t compressionMethod;
|
||||
uint32_t totalSize;
|
||||
uint32_t uncompressedBinarySize;
|
||||
uint64_t Hash;
|
||||
const char compressedBinarydesc[1];
|
||||
};
|
||||
} // namespace
|
||||
|
||||
bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) {
|
||||
std::string magic(reinterpret_cast<const char*>(data),
|
||||
kOffloadBundleUncompressedMagicStrSize - 1);
|
||||
if (!magic.compare(kOffloadBundleUncompressedMagicStr)) {
|
||||
isCompressed = false;
|
||||
return true;
|
||||
}
|
||||
std::string magic1(reinterpret_cast<const char*>(data),
|
||||
kOffloadBundleCompressedMagicStrSize - 1);
|
||||
if (!magic1.compare(kOffloadBundleCompressedMagicStr)) {
|
||||
isCompressed = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t CodeObject::getGenericVersion(const void* image) {
|
||||
const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(image);
|
||||
return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA &&
|
||||
ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ?
|
||||
((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) : 0;
|
||||
}
|
||||
|
||||
bool CodeObject::isGenericTarget(const void* image) {
|
||||
return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN;
|
||||
}
|
||||
|
||||
bool CodeObject::containGenericTarget(const void *data) {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize)) {
|
||||
if (desc->size == 0) continue;
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
if (isGenericTarget(image)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
|
||||
|
||||
// Consumes the string 'consume_' from the starting of the given input
|
||||
// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa--
|
||||
// input will become gfx908.
|
||||
static bool consume(std::string& input, std::string consume_) {
|
||||
if (input.substr(0, consume_.size()) != consume_) {
|
||||
return false;
|
||||
}
|
||||
input = input.substr(consume_.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get gpuname
|
||||
// example: input is gfx908:sram-ecc+ and trim char is :
|
||||
// input will become :sram-ecc+.
|
||||
static std::string trimName(std::string& input, char trim) {
|
||||
auto pos_ = input.find(trim);
|
||||
auto res = input;
|
||||
if (pos_ == std::string::npos) {
|
||||
input = "";
|
||||
} else {
|
||||
res = input.substr(0, pos_);
|
||||
input = input.substr(pos_);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get bundle entry ID.
|
||||
// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is .
|
||||
// input will become amdgcn-amd-amdhsa--gfx1035
|
||||
static bool trimNameTail(std::string& input, char trim) {
|
||||
auto pos_ = input.rfind(trim);
|
||||
if (pos_ == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
input = input.substr(0, pos_);
|
||||
return true;
|
||||
}
|
||||
|
||||
static char getFeatureValue(std::string& input, std::string feature) {
|
||||
char res = ' ';
|
||||
if (consume(input, std::move(feature))) {
|
||||
res = input[0];
|
||||
input = input.substr(1);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value,
|
||||
char& xnack_value) {
|
||||
processor = trimName(input, ':');
|
||||
sramecc_value = getFeatureValue(input, std::string(":sramecc"));
|
||||
if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false;
|
||||
xnack_value = getFeatureValue(input, std::string(":xnack"));
|
||||
if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
std::string agent_triple_target_id, unsigned int genericVersion) {
|
||||
// Primitive Check
|
||||
if (co_triple_target_id == agent_triple_target_id) return true;
|
||||
|
||||
// Parse code object triple target id
|
||||
if (!consume(co_triple_target_id, std::string(kAmdgcnTargetTriple) + '-')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string co_processor;
|
||||
char co_sram_ecc, co_xnack;
|
||||
if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!co_triple_target_id.empty()) return false;
|
||||
|
||||
// Parse agent isa triple target id
|
||||
if (!consume(agent_triple_target_id, std::string(kAmdgcnTargetTriple) + '-')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string agent_isa_processor;
|
||||
char isa_sram_ecc, isa_xnack;
|
||||
if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!agent_triple_target_id.empty()) return false;
|
||||
|
||||
// Check for compatibility
|
||||
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
// co_processor is generic target
|
||||
if (!helpers::IsCompatibleWithGenericTarget(co_processor, agent_isa_processor))
|
||||
return false;
|
||||
} else if (agent_isa_processor != co_processor) {
|
||||
return false;
|
||||
}
|
||||
if (co_sram_ecc != ' ') {
|
||||
if (co_sram_ecc != isa_sram_ecc) return false;
|
||||
}
|
||||
if (co_xnack != ' ') {
|
||||
if (co_xnack != isa_xnack) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CodeObject::QueryGenericTarget(std::string agentTarget, std::string& processor,
|
||||
char& sram_ecc, char& xnack) {
|
||||
static const std::string head = std::string(kAmdgcnTargetTriple) + '-';
|
||||
// Parse agent isa triple target id
|
||||
if (!consume(agentTarget, head)) {
|
||||
return false;
|
||||
}
|
||||
if (!getTargetIDValue(agentTarget, processor, sram_ecc, xnack)) {
|
||||
return false;
|
||||
}
|
||||
if (processor.empty()) return false;
|
||||
auto &map = helpers::GenericTargetMapping();
|
||||
auto search = map.find(processor);
|
||||
if (search == map.end()) return false;
|
||||
processor = head + search->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) {
|
||||
if (isCompressed) {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleCompressedHeader*>(data);
|
||||
return obheader->totalSize;
|
||||
} else {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const __ClangOffloadBundleInfo* desc = &obheader->desc[0];
|
||||
uint64_t i = 0;
|
||||
while (++i < obheader->numOfCodeObjects) {
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize);
|
||||
}
|
||||
return desc->offset + desc->size;
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
const void* data, size_t size, const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
hipError_t hipStatus = hipSuccess;
|
||||
amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS;
|
||||
|
||||
const size_t num_devices = agent_triple_target_ids.size();
|
||||
size_t num_code_objs = num_devices;
|
||||
bool isCompressed = false;
|
||||
if (!IsClangOffloadMagicBundle(data, isCompressed)) {
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_COMGR, "IsClangOffloadMagicBundle(%p) return false", data);
|
||||
// hipModuleLoadData() will possibly call here
|
||||
return hipErrorInvalidKernelFile;
|
||||
}
|
||||
|
||||
if (size == 0) size = getFatbinSize(data, isCompressed);
|
||||
|
||||
amd_comgr_data_t dataCodeObj{0};
|
||||
amd_comgr_data_set_t dataSetBundled{0};
|
||||
amd_comgr_data_set_t dataSetUnbundled{0};
|
||||
amd_comgr_action_info_t actionInfoUnbundle{0};
|
||||
amd_comgr_data_t item{0};
|
||||
|
||||
|
||||
std::set<std::string> devicesSet{}; // To make sure device is unique
|
||||
std::set<std::string> genericDevicesSet{}; // Used to record generic targets
|
||||
|
||||
std::vector<const char*> bundleEntryIDs{};
|
||||
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
|
||||
for (size_t i = 0; i < num_devices; i++) {
|
||||
auto res = devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
|
||||
if (res.second) {
|
||||
// This is a new device in devicesSet
|
||||
bundleEntryIDs.push_back(res.first->c_str());
|
||||
std::string processor;
|
||||
char sram_ecc = ' ', xnack = ' ';
|
||||
if (!QueryGenericTarget(agent_triple_target_ids[i], processor, sram_ecc, xnack)) {
|
||||
continue; // No generic target for this device
|
||||
}
|
||||
// Now processor is generic such as
|
||||
// amdgcn-amd-amdhsa--gfx9-4-generic, amdgcn-amd-amdhsa--gfx11-generic
|
||||
processor = hipv4 + processor;
|
||||
auto ret = genericDevicesSet.insert(processor);
|
||||
if (ret.second) {
|
||||
// Without feature
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
if (xnack != ' ') {
|
||||
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
|
||||
if (ret.second) {
|
||||
// Generic target with xnack feature
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
}
|
||||
if (sram_ecc != ' ') {
|
||||
processor += ":sramecc";
|
||||
processor += sram_ecc;
|
||||
ret = genericDevicesSet.insert(processor);
|
||||
if (ret.second) {
|
||||
// Generic target with sramecc feature
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
if (xnack != ' ') {
|
||||
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
|
||||
if (ret.second) {
|
||||
// Generic target with sramecc and xnack features
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
// Create Bundled dataset
|
||||
comgrStatus = amd::Comgr::create_data_set(&dataSetBundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// CodeObject
|
||||
comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast<const char*>(data));
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::set_data_name("
|
||||
") failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
// Set up ActionInfo
|
||||
comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids(
|
||||
actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size());
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh",
|
||||
bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Unbundle
|
||||
comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle,
|
||||
dataSetBundled, dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check CodeObject count
|
||||
size_t count = 0;
|
||||
comgrStatus =
|
||||
amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
// Initialize Code objects
|
||||
code_objs.reserve(num_code_objs);
|
||||
for (size_t i = 0; i < num_code_objs; i++) {
|
||||
code_objs.push_back(std::make_pair(nullptr, 0));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (num_code_objs == 0) break;
|
||||
|
||||
size_t itemSize = 0;
|
||||
comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled,
|
||||
AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
std::string bundleEntryId(itemSize, 0);
|
||||
comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data());
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count,
|
||||
itemSize, comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "Found bundleEntryId=%s", bundleEntryId.c_str());
|
||||
|
||||
// Remove bundleEntryId_
|
||||
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
|
||||
// This is behavour in comgr unbundling which is subject to change.
|
||||
// So just give info.
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
|
||||
"bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(),
|
||||
kOffloadHipV4FatBinName_);
|
||||
}
|
||||
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
|
||||
|
||||
// Currently we only support EF_AMDGPU_GENERIC_VERSION_MIN on generic target
|
||||
uint32_t genericVersion =
|
||||
bundleEntryId.find("generic") != bundleEntryId.npos ? EF_AMDGPU_GENERIC_VERSION_MIN : 0;
|
||||
char* itemData = nullptr;
|
||||
for (size_t dev = 0; dev < num_devices; ++dev) {
|
||||
if (code_objs[dev].first != nullptr) {
|
||||
if (!isGenericTarget(code_objs[dev].first)) {
|
||||
continue; // Specific target already found
|
||||
} else if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
continue; // Generic target already found, no need to check another generic
|
||||
}
|
||||
}
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "agent_triple_target_ids[%zu]=%s, bundleEntryId=%s",
|
||||
dev, agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
|
||||
if (isCodeObjectCompatibleWithDevice(bundleEntryId, agent_triple_target_ids[dev],
|
||||
genericVersion)) {
|
||||
if (itemData == nullptr) {
|
||||
itemSize = 0;
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count,
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
if (itemSize == 0) {
|
||||
// If there isn't a code object for this device,
|
||||
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
|
||||
// valid name but no data. We need continue searching for other devices
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
|
||||
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev,
|
||||
agent_triple_target_ids[dev].c_str());
|
||||
break;
|
||||
}
|
||||
// itemData should be deleted in fatbin's destructor
|
||||
itemData = new char[itemSize];
|
||||
if (itemData == nullptr) {
|
||||
LogError("no enough memory");
|
||||
hipStatus = hipErrorOutOfMemory;
|
||||
break;
|
||||
}
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count,
|
||||
itemSize, comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
delete[] itemData;
|
||||
itemData = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (code_objs[dev].first != nullptr) {
|
||||
// This must be data of generic target
|
||||
bool used = false; // Still used by other devices?
|
||||
for (size_t i = 0; i < num_devices; ++i) {
|
||||
if (dev != i && code_objs[dev].first == code_objs[i].first) {
|
||||
used = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!used) {
|
||||
delete[] reinterpret_cast<const char*>(code_objs[dev].first);
|
||||
}
|
||||
} else {
|
||||
--num_code_objs;
|
||||
}
|
||||
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR,
|
||||
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s, %s), "
|
||||
"Size=%zu, num_code_objs=%zu",
|
||||
dev, agent_triple_target_ids[dev].c_str(), itemData,
|
||||
isCompressed ? "compressed" : "uncompressed",
|
||||
genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN ? "generic" : "non-generic",
|
||||
itemSize, num_code_objs);
|
||||
}
|
||||
}
|
||||
|
||||
comgrStatus = amd::Comgr::release_data(item);
|
||||
item.handle = 0;
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
if (hipStatus != hipSuccess) break;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// Cleanup
|
||||
if (actionInfoUnbundle.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
if (dataSetBundled.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataSetUnbundled.handle) {
|
||||
comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh",
|
||||
comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataCodeObj.handle) {
|
||||
comgrStatus = amd::Comgr::release_data(dataCodeObj);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (item.handle) {
|
||||
comgrStatus = amd::Comgr::release_data(item);
|
||||
if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus);
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
return hipStatus;
|
||||
}
|
||||
|
||||
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
|
||||
amd::ScopedLock lock(dclock_);
|
||||
@@ -620,7 +42,7 @@ hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
|
||||
// Number of devices = 1 in dynamic code object
|
||||
fb_info_ = new FatBinaryInfo(fname, image);
|
||||
std::vector<hip::Device*> devices = {g_devices[ihipGetDevice()]};
|
||||
IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices));
|
||||
IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinaryUsingCOMGR(devices));
|
||||
|
||||
// No Lazy loading for DynCO
|
||||
IHIP_RETURN_ONFAIL(fb_info_->BuildProgram(ihipGetDevice()));
|
||||
@@ -834,7 +256,7 @@ hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) {
|
||||
|
||||
// Create a new fat binary object and extract the fat binary for all devices.
|
||||
FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo(nullptr, data);
|
||||
hipError_t err = fatBinaryInfo->ExtractFatBinary(g_devices);
|
||||
hipError_t err = fatBinaryInfo->ExtractFatBinaryUsingCOMGR(g_devices);
|
||||
programs = fatBinaryInfo;
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -35,6 +35,52 @@ THE SOFTWARE.
|
||||
#include "platform/program.hpp"
|
||||
|
||||
namespace hip {
|
||||
namespace symbols {
|
||||
// In uncompressed mode
|
||||
constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__";
|
||||
static constexpr size_t kOffloadBundleUncompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleUncompressedMagicStr);
|
||||
|
||||
// In compressed mode
|
||||
constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB";
|
||||
static constexpr size_t kOffloadBundleCompressedMagicStrSize =
|
||||
sizeof(kOffloadBundleCompressedMagicStr);
|
||||
|
||||
constexpr char kOffloadKindHip[] = "hip";
|
||||
constexpr char kOffloadKindHipv4[] = "hipv4";
|
||||
constexpr char kOffloadKindHcc[] = "hcc";
|
||||
constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-";
|
||||
constexpr char kHipFatBinName[] = "hipfatbin";
|
||||
constexpr char kHipFatBinName_[] = "hipfatbin-";
|
||||
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
|
||||
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
|
||||
|
||||
// Clang Offload bundler description & Header in uncompressed mode.
|
||||
struct ClangOffloadBundleInfo {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
uint64_t bundleEntryIdSize;
|
||||
const char bundleEntryId[1];
|
||||
};
|
||||
|
||||
struct ClangOffloadBundleUncompressedHeader {
|
||||
const char magic[kOffloadBundleUncompressedMagicStrSize - 1];
|
||||
uint64_t numOfCodeObjects;
|
||||
ClangOffloadBundleInfo desc[1];
|
||||
};
|
||||
|
||||
// Clang Offload bundler description & Header in compressed mode.
|
||||
struct ClangOffloadBundleCompressedHeader {
|
||||
const char magic[kOffloadBundleCompressedMagicStrSize - 1];
|
||||
uint16_t versionNumber;
|
||||
uint16_t compressionMethod;
|
||||
uint32_t totalSize;
|
||||
uint32_t uncompressedBinarySize;
|
||||
uint64_t Hash;
|
||||
const char compressedBinarydesc[1];
|
||||
};
|
||||
} // namespace symbols
|
||||
|
||||
//Forward Declaration for friend usage
|
||||
class PlatformState;
|
||||
|
||||
@@ -43,50 +89,6 @@ class CodeObject {
|
||||
public:
|
||||
virtual ~CodeObject() {}
|
||||
|
||||
// Functions to add_dev_prog and build
|
||||
static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr,
|
||||
size_t binary_size);
|
||||
static hipError_t build_module(hipModule_t hmod, const std::vector<amd::Device*>& devices);
|
||||
|
||||
static uint64_t ElfSize(const void* emi);
|
||||
|
||||
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
|
||||
|
||||
static uint32_t getGenericVersion(const void* image);
|
||||
|
||||
static bool isGenericTarget(const void* image);
|
||||
|
||||
static bool containGenericTarget(const void *data);
|
||||
|
||||
// Return size of fat bin
|
||||
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
|
||||
|
||||
/**
|
||||
* @brief Extract code object from fatbin using comgr unbundling action
|
||||
*
|
||||
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
|
||||
* compressed and even SPIR-V(to be supported later) mode.
|
||||
* @param[in] size the size of the bundle data
|
||||
* @param[in] agent_triple_target_ids isa names of concerned devices
|
||||
* @param[out] code_objs the buffer address and size pairs of extracted code objects of
|
||||
* concerned devices
|
||||
* Returned error code
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue,
|
||||
* #hipErrorNoBinaryForGpu
|
||||
*
|
||||
* @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
|
||||
* const std::vector<hip::Device*>& devices)
|
||||
*/
|
||||
static hipError_t extractCodeObjectFromFatBinaryUsingComgr(
|
||||
const void* data, size_t size, const std::vector<std::string>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
|
||||
// Query the generic target of agent target.
|
||||
// Return true on successfull query, false on failure
|
||||
static bool QueryGenericTarget(std::string agentTarget, std::string& processor, char& sram_ecc,
|
||||
char& xnack);
|
||||
|
||||
protected:
|
||||
CodeObject() {}
|
||||
|
||||
@@ -185,6 +187,6 @@ private:
|
||||
std::unordered_map<int, bool> managedVarsDevicePtrInitalized_;
|
||||
};
|
||||
|
||||
}; // namespace hip
|
||||
}; // namespace hip
|
||||
|
||||
#endif /* HIP_CODE_OBJECT_HPP */
|
||||
|
||||
@@ -21,6 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
#define LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING 1
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <io.h>
|
||||
#if defined(__has_attribute)
|
||||
@@ -32,7 +33,6 @@ THE SOFTWARE.
|
||||
using namespace llvm::ELF;
|
||||
|
||||
namespace hip {
|
||||
|
||||
std::unordered_set<LinkProgram*> LinkProgram::linker_set_;
|
||||
|
||||
namespace helpers {
|
||||
@@ -64,239 +64,6 @@ struct __ClangOffloadBundleHeader {
|
||||
__ClangOffloadBundleInfo desc[1];
|
||||
};
|
||||
|
||||
static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported,
|
||||
bool& sramEccSupported) {
|
||||
switch (EFlags & EF_AMDGPU_MACH) {
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX700:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx700";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX701:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx701";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX702:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx702";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX703:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx703";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX704:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx704";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX705:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx705";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX801:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx801";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX802:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx802";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX803:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx803";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX805:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx805";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX810:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx810";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX900:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx900";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX902:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx902";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX904:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx904";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX906:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = true;
|
||||
proc_name = "gfx906";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX908:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = true;
|
||||
proc_name = "gfx908";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX909:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx909";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90A:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = true;
|
||||
proc_name = "gfx90a";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx90c";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX942:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = true;
|
||||
proc_name = "gfx942";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX950:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = true;
|
||||
proc_name = "gfx950";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1010";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1011";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1012";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1013:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1013";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1030";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1031";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1032";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1033";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1034:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1034";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1035:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1035";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1036:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1036";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1100:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1100";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1101:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1101";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1102:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1102";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1103:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1103";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1150:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1150";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1151:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1151";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1200:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1200";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1201:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1201";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx9-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx10-1-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx10-3-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx11-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx12-generic";
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Consumes the string 'consume_' from the starting of the given input
|
||||
// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa--
|
||||
// input will become gfx908.
|
||||
|
||||
@@ -20,7 +20,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
|
||||
#include "hip_fatbin.hpp"
|
||||
#include "hip_global.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
#include "hip_code_object.hpp"
|
||||
@@ -28,7 +32,6 @@ THE SOFTWARE.
|
||||
#include "comgrctx.hpp"
|
||||
|
||||
namespace hip {
|
||||
|
||||
namespace comgr_helper {
|
||||
|
||||
template <typename comgr_T> class ComgrUniqueHandle {
|
||||
@@ -154,30 +157,312 @@ void ListAllDeviceWithNoCOFromBundle(
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
|
||||
bool &containGenericTarget) {
|
||||
amd_comgr_data_t data_object {0};
|
||||
amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
static std::string TargetGenericMap(const std::string& input) {
|
||||
const static std::unordered_map<std::string, std::string> target_map{
|
||||
// clang-format off
|
||||
{"amdgcn-amd-amdhsa--gfx900" , "amdgcn-amd-amdhsa--gfx9-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx902" , "amdgcn-amd-amdhsa--gfx9-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx904" , "amdgcn-amd-amdhsa--gfx9-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx906" , "amdgcn-amd-amdhsa--gfx9-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx909" , "amdgcn-amd-amdhsa--gfx9-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx90c" , "amdgcn-amd-amdhsa--gfx9-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx942" , "amdgcn-amd-amdhsa--gfx9-4-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx950" , "amdgcn-amd-amdhsa--gfx9-4-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1010", "amdgcn-amd-amdhsa--gfx10-1-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1011", "amdgcn-amd-amdhsa--gfx10-1-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1012", "amdgcn-amd-amdhsa--gfx10-1-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1013", "amdgcn-amd-amdhsa--gfx10-1-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1030", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1031", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1032", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1033", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1034", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1035", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1036", "amdgcn-amd-amdhsa--gfx10-3-generic"},
|
||||
{"amdgcn-amd-amdhsa--gfx1100", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1101", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1102", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1103", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1150", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1151", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1152", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1153", "amdgcn-amd-amdhsa--gfx11-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1200", "amdgcn-amd-amdhsa--gfx12-generic" },
|
||||
{"amdgcn-amd-amdhsa--gfx1201", "amdgcn-amd-amdhsa--gfx12-generic" },
|
||||
// clang-format on
|
||||
};
|
||||
if (auto i = target_map.find(input); i != target_map.end()) {
|
||||
return i->second;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// If image was passed as a pointer to our hipMod* api, we can try to extract the file name
|
||||
// if it was mapped by the app. Otherwise use the COMGR data API.
|
||||
if (fname_.size() == 0) {
|
||||
if (image_ == nullptr) {
|
||||
LogError("Both Filename and image cannot be null");
|
||||
return hipErrorInvalidValue;
|
||||
// For sramecc and xnack
|
||||
static std::string TargetFeatureCheck(const std::string& input, std::string feature) {
|
||||
if (input.find(feature) != std::string::npos) {
|
||||
auto feature_p = feature + "+"; // feature present eg: xnack+
|
||||
auto feature_m = feature + "-"; // feature absent eg: xnack-
|
||||
if (input.find(feature_p) != std::string::npos) {
|
||||
return feature_p;
|
||||
} else if (input.find(feature_m) != std::string::npos) {
|
||||
return feature_m;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static std::string TargetToGeneric(std::string input) {
|
||||
auto sramecc = TargetFeatureCheck(input, "sramecc");
|
||||
auto xnack = TargetFeatureCheck(input, "xnack");
|
||||
|
||||
// Remove all features
|
||||
size_t index = input.find_first_of(":");
|
||||
std::string name_without_feature = input.substr(0, index);
|
||||
|
||||
// Look up generic name
|
||||
auto generic_name = TargetGenericMap(name_without_feature);
|
||||
if (generic_name.empty()) {
|
||||
return generic_name; // No generic exists
|
||||
}
|
||||
|
||||
// reappend feature
|
||||
if (!sramecc.empty()) {
|
||||
generic_name += ":";
|
||||
generic_name += sramecc;
|
||||
}
|
||||
if (!xnack.empty()) {
|
||||
generic_name += ":";
|
||||
generic_name += xnack;
|
||||
}
|
||||
return generic_name;
|
||||
}
|
||||
|
||||
static bool IsCodeObjectUncompressed(const void* image) {
|
||||
return std::memcmp(image,
|
||||
reinterpret_cast<const void*>(symbols::kOffloadBundleUncompressedMagicStr),
|
||||
sizeof(symbols::kOffloadBundleUncompressedMagicStr) - 1) == 0;
|
||||
}
|
||||
|
||||
static bool IsCodeObjectCompressed(const void* image) {
|
||||
return std::memcmp(image,
|
||||
reinterpret_cast<const void*>(symbols::kOffloadBundleCompressedMagicStr),
|
||||
sizeof(symbols::kOffloadBundleCompressedMagicStr) - 1) == 0;
|
||||
}
|
||||
|
||||
static bool IsCodeObjectElf(const void* image) {
|
||||
const amd::Elf64_Ehdr* ehdr = reinterpret_cast<const amd::Elf64_Ehdr*>(image);
|
||||
return ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == llvm::ELF::ELFOSABI_AMDGPU_HSA;
|
||||
}
|
||||
|
||||
static bool UncompressAndPopulateCodeObject(
|
||||
const void* image, const std::set<std::string>& unique_isa_names,
|
||||
std::map<std::string, std::pair<const void*, size_t>>& code_obj_map) {
|
||||
auto remove_file_extension = [](const std::string& input) -> std::string {
|
||||
size_t index = input.find_last_of(".");
|
||||
std::string ret = input.substr(0, index);
|
||||
return ret;
|
||||
};
|
||||
|
||||
std::vector<std::string> bundle_ids_str;
|
||||
std::set<std::string> unique_ids;
|
||||
|
||||
for (const auto& isa_name : unique_isa_names) {
|
||||
bundle_ids_str.push_back(std::string(symbols::kOffloadKindHipv4_) + isa_name);
|
||||
}
|
||||
|
||||
std::vector<const char*> bundle_ids;
|
||||
bundle_ids.reserve(bundle_ids_str.size());
|
||||
for (auto& bundle_id_str : bundle_ids_str) {
|
||||
bundle_ids.push_back(bundle_id_str.c_str());
|
||||
}
|
||||
|
||||
const auto obheader =
|
||||
reinterpret_cast<const symbols::ClangOffloadBundleCompressedHeader*>(image);
|
||||
const size_t size = obheader->totalSize;
|
||||
|
||||
bool passed = false;
|
||||
do {
|
||||
comgr_helper::ComgrDataSetUniqueHandle bundled_co, unbundled_co;
|
||||
comgr_helper::ComgrDataUniqueHandle input_bundle;
|
||||
if (auto comgr_status = bundled_co.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in creating bundled_co");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = unbundled_co.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in creating unbundled_co");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = input_bundle.Create(AMD_COMGR_DATA_KIND_OBJ_BUNDLE);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in creating input bundle");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::set_data(input_bundle.get(), size, static_cast<const char*>(image));
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in setting image data to bundle");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = amd::Comgr::set_data_name(input_bundle.get(), symbols::kHipFatBinName);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in setting image data to bundle");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = amd::Comgr::data_set_add(bundled_co.get(), input_bundle.get());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in adding data set");
|
||||
break;
|
||||
}
|
||||
|
||||
comgr_helper::ComgrActionInfoUniqueHandle unbundle_action;
|
||||
if (auto comgr_status = unbundle_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in creating unbundle action");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = amd::Comgr::action_info_set_bundle_entry_ids(
|
||||
unbundle_action.get(), bundle_ids.data(), bundle_ids.size());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Error in setting bundle entry ids");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, unbundle_action.get(),
|
||||
bundled_co.get(), unbundled_co.get());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to unbundle code object");
|
||||
break;
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
if (auto comgr_status = amd::Comgr::action_data_count(unbundled_co.get(),
|
||||
AMD_COMGR_DATA_KIND_EXECUTABLE, &count);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data count of unbundled code object");
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
amd_comgr_data_t item;
|
||||
if (auto comgr_status = amd::Comgr::action_data_get_data(
|
||||
unbundled_co.get(), AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data unbundled code object");
|
||||
break;
|
||||
}
|
||||
|
||||
size_t item_name_size = 0;
|
||||
if (auto comgr_status = amd::Comgr::get_data_name(item, &item_name_size, nullptr);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data size");
|
||||
break;
|
||||
}
|
||||
|
||||
std::string item_bundle_id(item_name_size, 0);
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::get_data_name(item, &item_name_size, item_bundle_id.data());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data");
|
||||
break;
|
||||
}
|
||||
|
||||
size_t item_size = 0;
|
||||
if (auto comgr_status = amd::Comgr::get_data(item, &item_size, nullptr);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data size");
|
||||
break;
|
||||
}
|
||||
|
||||
if (item_size > 0) {
|
||||
char* item_data = new char[item_size];
|
||||
if (auto comgr_status = amd::Comgr::get_data(item, &item_size, item_data);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data");
|
||||
break;
|
||||
}
|
||||
|
||||
std::string bundle_entry = remove_file_extension(
|
||||
std::string(item_bundle_id.c_str() + sizeof(symbols::kOffloadHipV4FatBinName_) - 1));
|
||||
LogPrintfInfo("Inserting bundle entry of %s : size: %d, data: %p", bundle_entry.c_str(),
|
||||
item_size, item_data);
|
||||
code_obj_map[bundle_entry] = std::make_pair(item_data, item_size);
|
||||
}
|
||||
}
|
||||
passed = true;
|
||||
} while (0);
|
||||
|
||||
return passed;
|
||||
}
|
||||
|
||||
static bool PopulateCodeObjectMap(
|
||||
const void* image, const std::set<std::string>& unique_isa_names,
|
||||
std::map<std::string, std::pair<const void*, size_t>>& code_obj_map) {
|
||||
bool passed = false;
|
||||
do {
|
||||
comgr_helper::ComgrDataUniqueHandle data_object;
|
||||
if (auto comgr_status = data_object.Create(AMD_COMGR_DATA_KIND_FATBIN);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Creating data object failed with status %d ", comgr_status);
|
||||
break;
|
||||
}
|
||||
|
||||
// There is no way to find size of offload bundle, so we pass 4096 here.
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::set_data(data_object.get(), 4096, reinterpret_cast<const char*>(image));
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
break;
|
||||
}
|
||||
|
||||
// Create a query list using COMGR info for unique ISAs.
|
||||
std::vector<amd_comgr_code_object_info_t> query_list_array;
|
||||
query_list_array.reserve(unique_isa_names.size());
|
||||
for (const auto& isa_name : unique_isa_names) {
|
||||
auto& item = query_list_array.emplace_back();
|
||||
item.isa = isa_name.c_str();
|
||||
item.size = 0;
|
||||
item.offset = 0;
|
||||
}
|
||||
|
||||
// Look up the code object info passing the query list.
|
||||
if (auto comgr_status = amd::Comgr::lookup_code_object(
|
||||
data_object.get(), query_list_array.data(), unique_isa_names.size());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
break;
|
||||
}
|
||||
|
||||
for (const auto& item : query_list_array) {
|
||||
if (item.size > 0) {
|
||||
char* d = new char[item.size];
|
||||
std::memcpy(reinterpret_cast<void*>(d), reinterpret_cast<const char*>(image) + item.offset,
|
||||
item.size);
|
||||
code_obj_map[item.isa] = std::make_pair(d, item.size);
|
||||
}
|
||||
}
|
||||
|
||||
passed = true;
|
||||
} while (0);
|
||||
return passed;
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices) {
|
||||
if (fname_.empty() && image_ == nullptr) {
|
||||
LogError("Both Filename and image cannot be null");
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
if (image_ != nullptr) {
|
||||
if (!amd::Os::FindFileNameFromAddress(image_, &fname_, &foffset_)) {
|
||||
fname_ = std::string("");
|
||||
foffset_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// If file name & path are available (or it is passed to you), then get the file desc to use
|
||||
// COMGR file slice APIs.
|
||||
if (image_ == nullptr && fname_.size() > 0) {
|
||||
// Get File Handle & size of the file.
|
||||
} else {
|
||||
ufd_ = PlatformState::instance().GetUniqueFileHandle(fname_.c_str());
|
||||
if (ufd_ == nullptr) {
|
||||
return hipErrorFileNotFound;
|
||||
@@ -197,264 +482,183 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
|
||||
image_mapped_ = true;
|
||||
}
|
||||
|
||||
// At this line, image should be a valid ptr.
|
||||
guarantee(image_ != nullptr, "Image cannot be nullptr, file:%s did not map for some reason",
|
||||
fname_.c_str());
|
||||
|
||||
bool is_compressed = IsCodeObjectCompressed(image_),
|
||||
is_uncompressed = IsCodeObjectUncompressed(image_);
|
||||
|
||||
// It better be elf if its neither compressed nor uncompressed
|
||||
if (!is_compressed && !is_uncompressed) {
|
||||
if (IsCodeObjectElf(image_)) {
|
||||
// Load the binary directly
|
||||
auto elf_size = amd::Elf::getElfSize(image_);
|
||||
for (size_t i = 0; i < devices.size(); i++) {
|
||||
if (hipSuccess != AddDevProgram(devices[i], image_, elf_size, 0))
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
return hipSuccess; // We are done since it was already ELF
|
||||
} else {
|
||||
LogError("The code object has invalid header: compressed, uncompressed or elf");
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
}
|
||||
|
||||
// Create a list of all targets, which the current device can run
|
||||
// For example, gfx1030 can run gfx1030, gfx10-geneeric, amdgcnspirv
|
||||
std::set<std::string> unique_isa_names;
|
||||
const std::string spirv_isa_name{"spirv64-amd-amdhsa--amdgcnspirv"};
|
||||
unique_isa_names.insert(spirv_isa_name); // Insert SPIRV ISA name
|
||||
for (auto device : devices) {
|
||||
std::string device_name = device->devices()[0]->isa().isaName();
|
||||
unique_isa_names.insert(device_name);
|
||||
auto generic_name = TargetToGeneric(device_name);
|
||||
LogPrintfInfo("Looking up generic name of : %s - %s", device_name.c_str(),
|
||||
generic_name.c_str());
|
||||
if (!generic_name.empty()) {
|
||||
unique_isa_names.insert(generic_name);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::pair<const void*, size_t>> code_obj_map; //!< code object map
|
||||
if (is_compressed) {
|
||||
if (!UncompressAndPopulateCodeObject(image_, unique_isa_names, code_obj_map)) {
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
} else { // uncompressed code object
|
||||
if (!PopulateCodeObjectMap(image_, unique_isa_names, code_obj_map)) {
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hip_status = hipErrorInvalidImage;
|
||||
do {
|
||||
bool isCompressed = false;
|
||||
// If the image ptr is not clang offload bundle then just directly point the image.
|
||||
if (!CodeObject::IsClangOffloadMagicBundle(image_, isCompressed)) {
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
uint64_t elf_size = CodeObject::ElfSize(image_);
|
||||
if (elf_size == 0) {
|
||||
hip_status = hipErrorInvalidImage;
|
||||
break;
|
||||
}
|
||||
hip_status = AddDevProgram(devices[dev_idx], image_, elf_size, 0);
|
||||
bool spirv_isa_found = code_obj_map.find(spirv_isa_name) != code_obj_map.end();
|
||||
for (auto device : devices) {
|
||||
std::string device_name = device->devices()[0]->isa().isaName();
|
||||
auto generic_target_name = TargetToGeneric(device_name); // Generic Code Object
|
||||
auto native_co = code_obj_map.find(device_name); // Native Code Object
|
||||
auto generic_co = code_obj_map.find(generic_target_name); // generic Code Object
|
||||
LogPrintfInfo("Device name: %s Generic name: %s", device_name.c_str(),
|
||||
generic_target_name.c_str());
|
||||
|
||||
// If the size is not 0, that means we found the native isa code object
|
||||
if (native_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) {
|
||||
LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId());
|
||||
|
||||
// We need to do this because there is existing mechanism which deletes code object in
|
||||
// destructor. Ideally next set of refactor should sort it.
|
||||
char* co = new char[native_co->second.second];
|
||||
std::memcpy(co, reinterpret_cast<const char*>(native_co->second.first),
|
||||
native_co->second.second);
|
||||
hip_status = AddDevProgram(device, co, native_co->second.second, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!isCompressed) {
|
||||
if (CodeObject::containGenericTarget(image_)) {
|
||||
LogInfo("offload bundle contains generic target code object");
|
||||
containGenericTarget = true;
|
||||
}
|
||||
}
|
||||
if (isCompressed || containGenericTarget) {
|
||||
size_t major = 0, minor = 0;
|
||||
amd::Comgr::get_version(&major, &minor);
|
||||
if ((major == 2 && minor >= 8) || major > 2) {
|
||||
hip_status = ExtractFatBinaryUsingCOMGR(image_, devices);
|
||||
break;
|
||||
} else if (isCompressed) {
|
||||
LogPrintfError("comgr %zu.%zu cannot support compressed mode which requires comgr 2.8+",
|
||||
major, minor);
|
||||
hip_status = hipErrorNotSupported;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Create a data object, if it fails return error
|
||||
if ((comgr_status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)) !=
|
||||
AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Creating data object failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ufd_ != nullptr && amd::Os::isValidFileDesc(ufd_->fdesc_)) {
|
||||
LogPrintfError("Have valid file!%d", ufd_->fdesc_);
|
||||
}
|
||||
|
||||
if (image_ != nullptr) {
|
||||
// Using the image ptr, map the data object.
|
||||
if ((comgr_status =
|
||||
amd::Comgr::set_data(data_object, 4096, reinterpret_cast<const char*>(image_))) !=
|
||||
AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
guarantee(false, "Cannot have both fname_ and image_ as nullptr");
|
||||
}
|
||||
|
||||
// Find the unique number of ISAs needed for this COMGR query.
|
||||
std::unordered_map<std::string, std::pair<size_t, size_t>> unique_isa_names;
|
||||
for (auto device : devices) {
|
||||
std::string device_name = device->devices()[0]->isa().isaName();
|
||||
unique_isa_names.insert({device_name, std::make_pair<size_t, size_t>(0, 0)});
|
||||
}
|
||||
|
||||
// there are two spirv targets, spirv64-amd-amdhsa--amdgcnspirv and
|
||||
// spirv64-amd-amdhsa-unknown-amdgcnspirv.
|
||||
// eventually we will remove spirv64-amd-amdhsa--amdgcnspirv
|
||||
const std::vector<std::string> spirv_isa_names = {"spirv64-amd-amdhsa--amdgcnspirv",
|
||||
"spirv64-amd-amdhsa-unknown-amdgcnspirv"};
|
||||
for (const auto& spirv_isa_name : spirv_isa_names) {
|
||||
unique_isa_names.insert({spirv_isa_name, std::make_pair<size_t, size_t>(0, 0)});
|
||||
}
|
||||
|
||||
// Create a query list using COMGR info for unique ISAs.
|
||||
std::vector<amd_comgr_code_object_info_t> query_list_array;
|
||||
query_list_array.reserve(unique_isa_names.size());
|
||||
for (const auto& isa_name : unique_isa_names) {
|
||||
auto& item = query_list_array.emplace_back();
|
||||
item.isa = isa_name.first.c_str();
|
||||
item.size = 0;
|
||||
item.offset = 0;
|
||||
}
|
||||
|
||||
// Look up the code object info passing the query list.
|
||||
if ((comgr_status = amd::Comgr::lookup_code_object(data_object, query_list_array.data(),
|
||||
unique_isa_names.size())) !=
|
||||
AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Setting data from file slice failed with status %d ", comgr_status);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
for (const auto& item : query_list_array) {
|
||||
auto unique_it = unique_isa_names.find(item.isa);
|
||||
guarantee(unique_isa_names.cend() != unique_it, "Cannot find unique isa ");
|
||||
unique_it->second = std::pair<size_t, size_t>(static_cast<size_t>(item.size),
|
||||
static_cast<size_t>(item.offset));
|
||||
}
|
||||
|
||||
bool spirv_isa_found = false;
|
||||
decltype(unique_isa_names.begin()) spirv_isa_handle;
|
||||
for (const auto& spirv_isa_name : spirv_isa_names) {
|
||||
auto iter = unique_isa_names.find(spirv_isa_name);
|
||||
if (iter->second.first != 0) {
|
||||
spirv_isa_found = true;
|
||||
spirv_isa_handle = iter;
|
||||
}
|
||||
}
|
||||
bool get_spirv_data_res = false;
|
||||
std::once_flag get_spirv_data_flag;
|
||||
std::unordered_map<std::string, std::pair<char*, size_t>> compiled_co; // code object cache
|
||||
comgr_helper::ComgrDataSetUniqueHandle spirv_data_set;
|
||||
comgr_helper::ComgrDataUniqueHandle spirv_data;
|
||||
|
||||
auto get_spirv_data = [&]() {
|
||||
if (comgr_status = spirv_data_set.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create SPIRV Data set");
|
||||
return;
|
||||
}
|
||||
|
||||
if (comgr_status = spirv_data.Create(AMD_COMGR_DATA_KIND_SPIRV);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create SPIRV Data");
|
||||
return;
|
||||
}
|
||||
|
||||
if (comgr_status =
|
||||
amd::Comgr::set_data(spirv_data.get(), spirv_isa_handle->second.first /* size */,
|
||||
reinterpret_cast<char*>(const_cast<void*>(image_)) +
|
||||
spirv_isa_handle->second.second /* buffer */);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to assign data in comgr");
|
||||
return;
|
||||
}
|
||||
|
||||
if (comgr_status = amd::Comgr::set_data_name(spirv_data.get(), "hip_code_object.spv");
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set data name");
|
||||
return;
|
||||
}
|
||||
|
||||
if (comgr_status = amd::Comgr::data_set_add(spirv_data_set.get(), spirv_data.get());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to add spir data");
|
||||
return;
|
||||
}
|
||||
|
||||
get_spirv_data_res = true;
|
||||
};
|
||||
|
||||
LogPrintfInfo("Searching for code objects, HIP_FORCE_SPIRV_CODEOBJECT: %d",
|
||||
HIP_FORCE_SPIRV_CODEOBJECT);
|
||||
|
||||
for (auto device : devices) {
|
||||
std::string device_name = device->devices()[0]->isa().isaName();
|
||||
auto dev_it = unique_isa_names.find(device_name);
|
||||
// If the size is not 0, that means we found the native isa code object
|
||||
if (dev_it->second.first != 0 && !HIP_FORCE_SPIRV_CODEOBJECT) {
|
||||
LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId());
|
||||
guarantee(unique_isa_names.cend() != dev_it,
|
||||
"Cannot find the device name in the unique device name");
|
||||
hip_status = AddDevProgram(
|
||||
device, reinterpret_cast<address>(const_cast<void*>(image_)) + dev_it->second.second,
|
||||
dev_it->second.first, dev_it->second.second);
|
||||
} else if (generic_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) {
|
||||
LogPrintfInfo("Using Generic code object: %s : %s", device->devices()[0]->isa().targetId(),
|
||||
generic_target_name.c_str());
|
||||
char* co = new char[generic_co->second.second];
|
||||
std::memcpy(co, reinterpret_cast<const char*>(generic_co->second.first),
|
||||
generic_co->second.second);
|
||||
hip_status = AddDevProgram(device, co, generic_co->second.second, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
} else if (spirv_isa_found) {
|
||||
std::call_once(get_spirv_data_flag, get_spirv_data);
|
||||
|
||||
if(!get_spirv_data_res) {
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
std::string target_id = device->devices()[0]->isa().targetId();
|
||||
if (auto code_iter = compiled_co.find(target_id); code_iter != compiled_co.end()) {
|
||||
// We have already compiled for it, lets reuse the code object
|
||||
char* co = new char[code_iter->second.second];
|
||||
std::memcpy(co, code_iter->second.first, code_iter->second.second);
|
||||
LogPrintfInfo("reusing code object for: %s", target_id.c_str());
|
||||
hip_status = AddDevProgram(device, co, code_iter->second.second, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
LogPrintfInfo("Creating ISA for: %s from spirv", target_id.c_str());
|
||||
comgr_helper::ComgrActionInfoUniqueHandle reloc_action;
|
||||
std::string isa = "amdgcn-amd-amdhsa--" + target_id;
|
||||
if (comgr_status = reloc_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create action");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status = amd::Comgr::action_info_set_isa_name(reloc_action.get(), isa.c_str());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set ISA name");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status = amd::Comgr::action_info_set_device_lib_linking(reloc_action.get(), true);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set device lib linking");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status = amd::Comgr::action_info_set_option_list(
|
||||
reloc_action.get(), nullptr /* options list */, 0 /* options size */);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set option list");
|
||||
break;
|
||||
}
|
||||
LogPrintfInfo("Creating ISA for: %s from spirv", target_id.c_str());
|
||||
|
||||
comgr_helper::ComgrDataSetUniqueHandle spirv_data_set;
|
||||
comgr_helper::ComgrDataSetUniqueHandle reloc_data;
|
||||
if (comgr_status = reloc_data.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create reloc data set");
|
||||
comgr_helper::ComgrDataUniqueHandle spirv_data;
|
||||
comgr_helper::ComgrActionInfoUniqueHandle reloc_action;
|
||||
|
||||
if (auto comgr_status = spirv_data_set.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create SPIRV Data set");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status =
|
||||
if (auto comgr_status = spirv_data.Create(AMD_COMGR_DATA_KIND_SPIRV);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create SPIRV Data");
|
||||
break;
|
||||
}
|
||||
|
||||
auto spirv_isa_handle = code_obj_map.find(spirv_isa_name);
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::set_data(spirv_data.get(), spirv_isa_handle->second.second /* size */,
|
||||
reinterpret_cast<const char*>(spirv_isa_handle->second.first)
|
||||
/* buffer */);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to assign SPIRV data");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = amd::Comgr::set_data_name(spirv_data.get(), "hip_code_object.spv");
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set spirv data's name");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = amd::Comgr::data_set_add(spirv_data_set.get(), spirv_data.get());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to add spir data to data set");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = reloc_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create reloc action");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::action_info_set_isa_name(reloc_action.get(), isa.c_str());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set reloc action's isa name");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status = reloc_data.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create reloc data");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::action_info_set_device_lib_linking(reloc_action.get(), true);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set device lib linking for reloc action");
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::do_action(AMD_COMGR_ACTION_COMPILE_SPIRV_TO_RELOCATABLE,
|
||||
reloc_action.get(), spirv_data_set.get(), reloc_data.get());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to compile to reloc");
|
||||
LogError("Failed to do action: codegen bc ot reloc");
|
||||
LogError("Failed to compile spirv to reloc");
|
||||
break;
|
||||
}
|
||||
|
||||
comgr_helper::ComgrActionInfoUniqueHandle exe_action;
|
||||
comgr_helper::ComgrDataSetUniqueHandle exe_output;
|
||||
|
||||
if (comgr_status = exe_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create action");
|
||||
if (auto comgr_status = exe_action.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create exe action");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status = amd::Comgr::action_info_set_isa_name(exe_action.get(), isa.c_str());
|
||||
if (auto comgr_status = amd::Comgr::action_info_set_isa_name(exe_action.get(), isa.c_str());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to set exe action isa name");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status = exe_output.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
if (auto comgr_status = exe_output.Create(); comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to create exe output");
|
||||
break;
|
||||
}
|
||||
|
||||
if (comgr_status =
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
|
||||
exe_action.get(), reloc_data.get(), exe_output.get());
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
@@ -463,74 +667,51 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
}
|
||||
|
||||
amd_comgr_data_t exe_data_handle;
|
||||
if (comgr_status = amd::Comgr::action_data_get_data(
|
||||
if (auto comgr_status = amd::Comgr::action_data_get_data(
|
||||
exe_output.get(), AMD_COMGR_DATA_KIND_EXECUTABLE, 0, &exe_data_handle);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to action get exe data");
|
||||
LogError("Failed to get exe data");
|
||||
break;
|
||||
}
|
||||
|
||||
// Move ownership of exe_data_handle to exe_data
|
||||
comgr_helper::ComgrDataUniqueHandle exe_data(exe_data_handle);
|
||||
|
||||
size_t co_size;
|
||||
if (comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, NULL);
|
||||
size_t co_size = 0;
|
||||
if (auto comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, NULL);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get exe size");
|
||||
break;
|
||||
}
|
||||
|
||||
char* co = new char[co_size];
|
||||
if (comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, co);
|
||||
if (auto comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, co);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get exe data");
|
||||
break;
|
||||
}
|
||||
|
||||
auto elf_size = CodeObject::ElfSize(co);
|
||||
hip_status = AddDevProgram(device, co, elf_size, 0);
|
||||
hip_status = AddDevProgram(device, co, co_size, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
// Save the compiled code object
|
||||
compiled_co[target_id] = std::make_pair(co, elf_size);
|
||||
} else {
|
||||
// We found neither a compatible code object nor SPIRV
|
||||
LogPrintfError(
|
||||
"No compatible code objects found for: %s, value of HIP_FORCE_SPIRV_CODEOBJECT: %d",
|
||||
device->devices()[0]->isa().targetId(), HIP_FORCE_SPIRV_CODEOBJECT);
|
||||
hip_status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (0);
|
||||
|
||||
if (comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("comgr API call failed");
|
||||
hip_status = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
// Clean up file and memory resouces if hip_status failed for some reason.
|
||||
if (hip_status != hipSuccess && hip_status != hipErrorInvalidKernelFile) {
|
||||
ReleaseImageAndFile();
|
||||
}
|
||||
|
||||
if (data_object.handle) {
|
||||
if ((comgr_status = amd::Comgr::release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
// release code objects
|
||||
for (const auto& co : code_obj_map) {
|
||||
delete[] reinterpret_cast<const char*>(co.second.first);
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devices) {
|
||||
amd::ScopedLock lock(FatBinaryLock());
|
||||
|
||||
bool containGenericTarget = false;
|
||||
return ExtractFatBinaryUsingCOMGR(devices, containGenericTarget);
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image,
|
||||
size_t binary_size, size_t binary_offset) {
|
||||
int devID = device->deviceId();
|
||||
@@ -569,55 +750,4 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) {
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
|
||||
const std::vector<hip::Device*>& devices) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// At this line, image should be a valid ptr.
|
||||
guarantee(data != nullptr, "Image cannot be nullptr");
|
||||
|
||||
do {
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
// Copy device names
|
||||
std::vector<std::string> device_names;
|
||||
device_names.reserve(devices.size());
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
|
||||
}
|
||||
|
||||
hip_status =
|
||||
CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0, device_names, code_objs);
|
||||
if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) {
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
if (code_objs[dev_idx].first) {
|
||||
hip_status =
|
||||
AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
return hip_status;
|
||||
}
|
||||
} else {
|
||||
// This is the case of hipErrorNoBinaryForGpu which will finally fail app on device
|
||||
// without code object
|
||||
LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(),
|
||||
device_names[dev_idx].c_str());
|
||||
}
|
||||
}
|
||||
} else if (hip_status == hipErrorInvalidKernelFile) {
|
||||
hip_status = hipSuccess;
|
||||
// If the image ptr is not clang offload bundle then just directly point the image.
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
hip_status = AddDevProgram(devices[dev_idx], data, CodeObject::ElfSize(data), 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
return hip_status;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LogPrintfError("CodeObject::extractCodeObjectFromFatBinaryUsingComgr failed with status %d\n",
|
||||
hip_status);
|
||||
}
|
||||
} while (0);
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
} // namespace hip
|
||||
|
||||
@@ -39,26 +39,7 @@ public:
|
||||
FatBinaryInfo(const char* fname, const void* image);
|
||||
~FatBinaryInfo();
|
||||
|
||||
// Loads Fat binary from file or image, unbundles COs for devices.
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
|
||||
bool &containGenericTarget);
|
||||
|
||||
/**
|
||||
* @brief Extract code object from fatbin using comgr unbundling action via calling
|
||||
* CodeObject::extractCodeObjectFromFatBinaryUsingComgr
|
||||
*
|
||||
* @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed,
|
||||
* compressed and even SPIR-V(to be supported later) mode.
|
||||
* @param[in] devices devices whose code objects will be extracted.
|
||||
* Returned error code
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorNoBinaryForGpu, #hipErrorInvalidValue
|
||||
*
|
||||
* @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr()
|
||||
*/
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const void* data,
|
||||
const std::vector<hip::Device*>& devices);
|
||||
hipError_t ExtractFatBinary(const std::vector<hip::Device*>& devices);
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
|
||||
hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size,
|
||||
size_t binary_offset);
|
||||
hipError_t BuildProgram(const int device_id);
|
||||
|
||||
Referencia en una nueva incidencia
Block a user