Do not allocate code objects when we map a static code object (#2332)
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
b4e04b07ed
Коммит
fdf73116d5
@@ -48,22 +48,16 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image)
|
||||
}
|
||||
|
||||
FatBinaryInfo::~FatBinaryInfo() {
|
||||
// Different devices in the same model have the same binary_image_
|
||||
std::set<const void*> toDelete;
|
||||
// Release per device fat bin info.
|
||||
for (int dev_id = 0; dev_id < dev_programs_.size(); dev_id++) {
|
||||
if (dev_programs_[dev_id] != nullptr) {
|
||||
auto& binaryInfo = dev_programs_[dev_id]->binary(*g_devices[dev_id]->devices()[0]);
|
||||
if (std::get<0>(binaryInfo) && std::get<1>(binaryInfo).second == 0 &&
|
||||
std::get<0>(binaryInfo) != image_) {
|
||||
toDelete.insert(std::get<0>(binaryInfo));
|
||||
}
|
||||
dev_programs_[dev_id]->release();
|
||||
dev_programs_[dev_id] = nullptr;
|
||||
}
|
||||
}
|
||||
for (auto itemData : toDelete) {
|
||||
delete[] reinterpret_cast<const char*>(itemData);
|
||||
// Release Code object allocations
|
||||
for (const auto& i : code_obj_allocations_) {
|
||||
delete[] reinterpret_cast<const char*>(i);
|
||||
}
|
||||
ReleaseImageAndFile();
|
||||
}
|
||||
@@ -296,7 +290,8 @@ static bool UncompressAndPopulateCodeObject(
|
||||
comgr_helper::ComgrDataUniqueHandle item_handle(item);
|
||||
|
||||
size_t item_name_size = 0;
|
||||
if (auto comgr_status = amd::Comgr::get_data_name(item_handle.get(), &item_name_size, nullptr);
|
||||
if (auto comgr_status =
|
||||
amd::Comgr::get_data_name(item_handle.get(), &item_name_size, nullptr);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get data size");
|
||||
break;
|
||||
@@ -376,10 +371,9 @@ static bool PopulateCodeObjectMap(
|
||||
|
||||
for (const auto& item : query_list_array) {
|
||||
if (item.size > 0) {
|
||||
char* d = new char[item.size];
|
||||
std::memcpy(reinterpret_cast<void*>(d), reinterpret_cast<const char*>(image) + item.offset,
|
||||
item.size);
|
||||
code_obj_map[item.isa] = std::make_pair(d, item.size);
|
||||
// Map the offset pointer and size from the image
|
||||
auto loc = reinterpret_cast<const char*>(image) + item.offset;
|
||||
code_obj_map[item.isa] = std::make_pair(loc, item.size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,12 +456,17 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
if (!UncompressAndPopulateCodeObject(image_, unique_isa_names, code_obj_map)) {
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
// For compressed code objects, we use comgr to extract and make a copy.
|
||||
// Track these to release later
|
||||
std::for_each(code_obj_map.begin(), code_obj_map.end(),
|
||||
[&](const auto& info) { code_obj_allocations_.insert(info.second.first); });
|
||||
} else { // uncompressed code object
|
||||
if (!PopulateCodeObjectMap(image_, unique_isa_names, code_obj_map)) {
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
}
|
||||
|
||||
LogPrintfInfo("Forcing SPIRV: %s", (HIP_FORCE_SPIRV_CODEOBJECT != 0 ? "true" : "false"));
|
||||
hipError_t hip_status = hipErrorInvalidImage;
|
||||
do {
|
||||
bool spirv_isa_found = code_obj_map.find(spirv_isa_name) != code_obj_map.end() ||
|
||||
@@ -481,24 +480,21 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
|
||||
// If the size is not 0, that means we found the native isa code object
|
||||
if (native_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) {
|
||||
// We need to do this because there is existing mechanism which deletes code object in
|
||||
// destructor. Ideally next set of refactor should sort it.
|
||||
char* co = new char[native_co->second.second];
|
||||
std::memcpy(co, reinterpret_cast<const char*>(native_co->second.first),
|
||||
native_co->second.second);
|
||||
hip_status = AddDevProgram(device, co, native_co->second.second, 0);
|
||||
LogPrintfInfo("Using native code object for device: %s co: %s", device_name.c_str(),
|
||||
native_co->first.c_str());
|
||||
hip_status = AddDevProgram(device, native_co->second.first, native_co->second.second, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
} else if (generic_co != code_obj_map.end() && !HIP_FORCE_SPIRV_CODEOBJECT) {
|
||||
char* co = new char[generic_co->second.second];
|
||||
std::memcpy(co, reinterpret_cast<const char*>(generic_co->second.first),
|
||||
generic_co->second.second);
|
||||
hip_status = AddDevProgram(device, co, generic_co->second.second, 0);
|
||||
LogPrintfInfo("Using generic code object for device: %s co: %s", device_name.c_str(),
|
||||
generic_co->first.c_str());
|
||||
hip_status = AddDevProgram(device, generic_co->second.first, generic_co->second.second, 0);
|
||||
if (hip_status != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
} else if (spirv_isa_found) {
|
||||
LogPrintfInfo("Using spirv code object for device: %s", device_name.c_str());
|
||||
std::string target_id = device->devices()[0]->isa().targetId();
|
||||
std::string isa = "amdgcn-amd-amdhsa--" + target_id;
|
||||
|
||||
@@ -620,6 +616,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
}
|
||||
|
||||
char* co = new char[co_size];
|
||||
code_obj_allocations_.insert(co); // track to release later
|
||||
if (auto comgr_status = amd::Comgr::get_data(exe_data.get(), &co_size, co);
|
||||
comgr_status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
LogError("Failed to get exe data");
|
||||
@@ -640,11 +637,6 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// release code objects
|
||||
for (const auto& co : code_obj_map) {
|
||||
delete[] reinterpret_cast<const char*>(co.second.first);
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
|
||||
@@ -86,8 +86,9 @@ class FatBinaryInfo {
|
||||
|
||||
std::vector<amd::Program*> dev_programs_; //!< Program info per Device
|
||||
|
||||
std::shared_ptr<UniqueFD> ufd_; //!< Unique file descriptor
|
||||
amd::Monitor fb_lock_{true}; //!< Lock for the fat binary access
|
||||
std::shared_ptr<UniqueFD> ufd_; //!< Unique file descriptor
|
||||
amd::Monitor fb_lock_{true}; //!< Lock for the fat binary access
|
||||
std::unordered_set<const void*> code_obj_allocations_; //!< Track allocations for code objects
|
||||
};
|
||||
|
||||
}; // namespace hip
|
||||
|
||||
Ссылка в новой задаче
Block a user