From bf28bbd9ab71b986cbb18f19fdad0d5066ec5a4b Mon Sep 17 00:00:00 2001 From: "Godavarthy Surya, Anusha" Date: Mon, 21 Apr 2025 13:43:25 +0530 Subject: [PATCH] SWDEV-508538 - Optimize mem access and pack structure (#71) Change-Id: Ib05b8891a6d228fc3266918a000d332fddc7438b --- hipamd/src/hip_fatbin.cpp | 159 +++++++++++++++--------------------- hipamd/src/hip_fatbin.hpp | 39 ++------- rocclr/platform/program.cpp | 12 ++- rocclr/platform/program.hpp | 8 +- 4 files changed, 88 insertions(+), 130 deletions(-) diff --git a/hipamd/src/hip_fatbin.cpp b/hipamd/src/hip_fatbin.cpp index 09d1393b01..6611580bab 100644 --- a/hipamd/src/hip_fatbin.cpp +++ b/hipamd/src/hip_fatbin.cpp @@ -93,14 +93,6 @@ typedef ComgrUniqueHandle ComgrDataUniqueHandle; } // namespace comgr_helper -FatBinaryDeviceInfo::~FatBinaryDeviceInfo() { - if (program_ != nullptr) { - program_->unload(); - program_->release(); - program_ = nullptr; - } -} - FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : fdesc_(amd::Os::FDescInit()), fsize_(0), @@ -114,19 +106,22 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) fname_ = std::string(); } - fatbin_dev_info_.resize(g_devices.size(), nullptr); + dev_programs_.resize(g_devices.size(), nullptr); } FatBinaryInfo::~FatBinaryInfo() { // Different devices in the same model have the same binary_image_ std::set toDelete; // Release per device fat bin info. - for (auto* fbd : fatbin_dev_info_) { - if (fbd != nullptr) { - if (fbd->binary_image_ && fbd->binary_offset_ == 0 && fbd->binary_image_ != image_) { - toDelete.insert(fbd->binary_image_); + for (int dev_id = 0; dev_id < dev_programs_.size(); dev_id++) { + if (dev_programs_[dev_id] != nullptr) { + auto& binaryInfo = dev_programs_[dev_id]->binary(*g_devices[dev_id]->devices()[0]); + if (std::get<0>(binaryInfo) && std::get<1>(binaryInfo).second == 0 && + std::get<0>(binaryInfo) != image_) { + toDelete.insert(std::get<0>(binaryInfo)); } - delete fbd; + dev_programs_[dev_id]->release(); + dev_programs_[dev_id] = nullptr; } } for (auto itemData : toDelete) { @@ -242,12 +237,8 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectordeviceId()] = - new FatBinaryDeviceInfo(image_, elf_size, 0); - fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = - new amd::Program(*devices[dev_idx]->asContext()); - if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) { - hip_status = hipErrorOutOfMemory; + hip_status = AddDevProgram(devices[dev_idx], image_, elf_size, 0); + if (hip_status != hipSuccess) { break; } } @@ -435,10 +426,12 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectordevices()[0]->isa().targetId()); guarantee(unique_isa_names.cend() != dev_it, "Cannot find the device name in the unique device name"); - fatbin_dev_info_[device->deviceId()] = new FatBinaryDeviceInfo( - reinterpret_cast
(const_cast(image_)) + dev_it->second.second, + hip_status = AddDevProgram( + device, reinterpret_cast
(const_cast(image_)) + dev_it->second.second, dev_it->second.first, dev_it->second.second); - fatbin_dev_info_[device->deviceId()]->program_ = new amd::Program(*(device->asContext())); + if (hip_status != hipSuccess) { + break; + } } else if (spirv_isa_found) { // Compile to bitcode once std::call_once(spirv_to_bc_flag, compile_spv_bitcode); @@ -454,9 +447,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectorsecond.second]; std::memcpy(co, code_iter->second.first, code_iter->second.second); LogPrintfInfo("reusing code object for: %s", target_id.c_str()); - fatbin_dev_info_[device->deviceId()] = - new FatBinaryDeviceInfo(co, code_iter->second.second, 0); - fatbin_dev_info_[device->deviceId()]->program_ = new amd::Program(*(device->asContext())); + hip_status = AddDevProgram(device, co, code_iter->second.second, 0); + if (hip_status != hipSuccess) { + break; + } continue; } @@ -554,9 +548,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectordeviceId()] = new FatBinaryDeviceInfo(co, elf_size, 0); - fatbin_dev_info_[device->deviceId()]->program_ = new amd::Program(*(device->asContext())); - + hip_status = AddDevProgram(device, co, elf_size, 0); + if (hip_status != hipSuccess) { + break; + } // Save the compiled code object compiled_co[target_id] = std::make_pair(co, elf_size); } else { @@ -657,13 +652,9 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devi // Calculate the offset wrt binary_image and the original image size_t offset_l = (reinterpret_cast
(const_cast(code_objs[dev_idx].first)) - reinterpret_cast
(const_cast(image_))); - - fatbin_dev_info_[devices[dev_idx]->deviceId()] = - new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l); - - fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = - new amd::Program(*devices[dev_idx]->asContext()); - if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { + hip_error = AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, + code_objs[dev_idx].second, offset_l); + if (hip_error != hipSuccess) { break; } } @@ -671,54 +662,45 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devi return hip_error; } + const void* binary_image; + size_t binary_size; + size_t binary_offset; if (hip_error == hipErrorInvalidKernelFile) { for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - // the image type is no CLANG_OFFLOAD_BUNDLER, image for current device directly passed - fatbin_dev_info_[devices[dev_idx]->deviceId()] = - new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0); + hip_error = AddDevProgram(devices[dev_idx], image_, CodeObject::ElfSize(image_), 0); + if (hip_error != hipSuccess) { + return hip_error; + } } } else if (hip_error == hipSuccess) { for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { // Calculate the offset wrt binary_image and the original image - size_t offset_l = (reinterpret_cast
(const_cast(code_objs[dev_idx].first)) - + binary_offset = (reinterpret_cast
(const_cast(code_objs[dev_idx].first)) - reinterpret_cast
(const_cast(image_))); - - fatbin_dev_info_[devices[dev_idx]->deviceId()] = - new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l); + hip_error = AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, + code_objs[dev_idx].second, binary_offset); + if (hip_error != hipSuccess) { + return hip_error; + } } } - - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = - new amd::Program(*devices[dev_idx]->asContext()); - if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { - return hipErrorOutOfMemory; - } - } - return hipSuccess; } -hipError_t FatBinaryInfo::AddDevProgram(const int device_id) { - // Device Id bounds Check - DeviceIdCheck(device_id); - - FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; - if (fbd_info == nullptr) { - return hipErrorInvalidKernelFile; +hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image, + size_t binary_size, size_t binary_offset) { + int devID = device->deviceId(); + amd::Context* ctx = device->asContext(); + amd::Program* program = new amd::Program(*ctx); + dev_programs_[devID] = program; + if (program == nullptr) { + return hipErrorOutOfMemory; } - - // If fat binary was already added, skip this step and return success - if (fbd_info->add_dev_prog_ == false) { - amd::Context* ctx = g_devices[device_id]->asContext(); - if (CL_SUCCESS != - fbd_info->program_->addDeviceProgram(*ctx->devices()[0], fbd_info->binary_image_, - fbd_info->binary_size_, false, nullptr, nullptr, - fdesc_, fbd_info->binary_offset_, uri_)) { - return hipErrorInvalidKernelFile; - } - fbd_info->add_dev_prog_ = true; + if (CL_SUCCESS != + program->addDeviceProgram(*ctx->devices()[0], binary_image, binary_size, false, nullptr, + nullptr, fdesc_, binary_offset, uri_)) { + return hipErrorInvalidKernelFile; } return hipSuccess; } @@ -726,21 +708,17 @@ hipError_t FatBinaryInfo::AddDevProgram(const int device_id) { hipError_t FatBinaryInfo::BuildProgram(const int device_id) { // Device Id Check and Add DeviceProgram if not added so far DeviceIdCheck(device_id); - IHIP_RETURN_ONFAIL(AddDevProgram(device_id)); // If Program was already built skip this step and return success - FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; - if (fbd_info->prog_built_ == false) { + if (dev_programs_[device_id]->IsProgramBuilt(*g_devices[device_id]->devices()[0]) == false) { if (CL_SUCCESS != - fbd_info->program_->build(g_devices[device_id]->devices(), nullptr, nullptr, nullptr, - kOptionChangeable, kNewDevProg)) { + dev_programs_[device_id]->build(g_devices[device_id]->devices(), nullptr, nullptr, nullptr, + kOptionChangeable, kNewDevProg)) { + return hipErrorNoBinaryForGpu; + } + if (!dev_programs_[device_id]->load()) { return hipErrorNoBinaryForGpu; } - fbd_info->prog_built_ = true; - } - - if (!fbd_info->program_->load()) { - return hipErrorNoBinaryForGpu; } return hipSuccess; } @@ -766,13 +744,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data, if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) { for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { if (code_objs[dev_idx].first) { - fatbin_dev_info_[devices[dev_idx]->deviceId()] = - new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, 0); - - fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = - new amd::Program(*devices[dev_idx]->asContext()); - if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { - break; + hip_status = + AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, code_objs[dev_idx].second, 0); + if (hip_status != hipSuccess) { + return hip_status; } } else { // This is the case of hipErrorNoBinaryForGpu which will finally fail app on device @@ -785,13 +760,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data, hip_status = hipSuccess; // If the image ptr is not clang offload bundle then just directly point the image. for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - fatbin_dev_info_[devices[dev_idx]->deviceId()] = - new FatBinaryDeviceInfo(data, CodeObject::ElfSize(data), 0); - fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = - new amd::Program(*devices[dev_idx]->asContext()); - if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) { - hip_status = hipErrorOutOfMemory; - break; + hip_status = AddDevProgram(devices[dev_idx], data, CodeObject::ElfSize(data), 0); + if (hip_status != hipSuccess) { + return hip_status; } } } else { diff --git a/hipamd/src/hip_fatbin.hpp b/hipamd/src/hip_fatbin.hpp index ea0ab74aea..2335449be0 100644 --- a/hipamd/src/hip_fatbin.hpp +++ b/hipamd/src/hip_fatbin.hpp @@ -33,30 +33,6 @@ struct UniqueFD; namespace hip { -//Fat Binary Per Device info -class FatBinaryDeviceInfo { -public: - FatBinaryDeviceInfo (const void* binary_image, size_t binary_size, size_t binary_offset) - : binary_image_(binary_image), binary_size_(binary_size), - binary_offset_(binary_offset), program_(nullptr), - add_dev_prog_(false), prog_built_(false) {} - - ~FatBinaryDeviceInfo(); - -private: - const void* binary_image_; // binary image ptr - size_t binary_size_; // binary image size - size_t binary_offset_; // image offset from original - - amd::Program* program_; // reinterpreted as hipModule_t - friend class FatBinaryInfo; - - //Control Variables - bool add_dev_prog_; - bool prog_built_; -}; - - // Fat Binary Info class FatBinaryInfo { public: @@ -83,29 +59,31 @@ public: hipError_t ExtractFatBinaryUsingCOMGR(const void* data, const std::vector& devices); hipError_t ExtractFatBinary(const std::vector& devices); - hipError_t AddDevProgram(const int device_id); + hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size, + size_t binary_offset); hipError_t BuildProgram(const int device_id); // Device Id bounds check inline void DeviceIdCheck(const int device_id) const { guarantee(device_id >= 0, "Invalid DeviceId less than 0"); - guarantee(static_cast(device_id) < fatbin_dev_info_.size(), "Invalid DeviceId, greater than no of fatbin device info!"); + guarantee(static_cast(device_id) < dev_programs_.size(), + "Invalid DeviceId, greater than no of device programs!"); } // Getter Methods amd::Program* GetProgram(int device_id) { DeviceIdCheck(device_id); - return fatbin_dev_info_[device_id]->program_; + return dev_programs_[device_id]; } hipModule_t Module(int device_id) const { DeviceIdCheck(device_id); - return reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); + return reinterpret_cast(as_cl(dev_programs_[device_id])); } hipError_t GetModule(int device_id, hipModule_t* hmod) const { DeviceIdCheck(device_id); - *hmod = reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); + *hmod = reinterpret_cast(as_cl(dev_programs_[device_id])); return hipSuccess; } @@ -125,8 +103,7 @@ private: // Only used for FBs where image is directly passed std::string uri_; //!< Uniform resource indicator - // Per Device Info, like corresponding binary ptr, size. - std::vector fatbin_dev_info_; + std::vector dev_programs_; //!< Program info per Device std::shared_ptr ufd_; //!< Unique file descriptor amd::Monitor fb_lock_{true}; //!< Lock for the fat binary access diff --git a/rocclr/platform/program.cpp b/rocclr/platform/program.cpp index 0980f7d894..cfae3161f0 100644 --- a/rocclr/platform/program.cpp +++ b/rocclr/platform/program.cpp @@ -73,6 +73,7 @@ static void remove_g_option(std::string &option) } Program::~Program() { + unload(); // Destroy all device programs for (const auto& it : devicePrograms_) { delete it.second; @@ -196,7 +197,7 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng } // Save the original image - binary_[&rootDev] = std::make_tuple(memory, length, make_copy); + binary_[&rootDev] = std::make_tuple(memory, std::make_pair(length, foffset), make_copy); } const device::Program* same_dev_prog = nullptr; @@ -278,7 +279,8 @@ int32_t Program::compile(const std::vector& devices, size_t numHeaders, device::Program* devProgram = getDeviceProgram(*it); if (devProgram == NULL) { const binary_t& bin = binary(*it); - retval = addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin), false, &parsedOptions); + retval = + addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin).first, false, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } @@ -397,7 +399,8 @@ int32_t Program::link(const std::vector& devices, size_t numInputs, device::Program* devProgram = getDeviceProgram(*it); if (devProgram == NULL) { const binary_t& bin = binary(*it); - retval = addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin), false, &parsedOptions); + retval = + addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin).first, false, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } @@ -531,7 +534,8 @@ int32_t Program::build(const std::vector& devices, const char* options, retval = false; continue; } - retval = addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin), false, &parsedOptions); + retval = + addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin).first, false, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } diff --git a/rocclr/platform/program.hpp b/rocclr/platform/program.hpp index 532e18b3a7..aa2894dfef 100644 --- a/rocclr/platform/program.hpp +++ b/rocclr/platform/program.hpp @@ -80,7 +80,8 @@ class Context; //! A collection of binaries for devices in the associated context. class Program : public RuntimeObject { public: - typedef std::tuple binary_t; + typedef std::tuple, + bool /*allocated*/> binary_t; typedef std::set devicelist_t; typedef std::unordered_map devicebinary_t; typedef std::unordered_map deviceprograms_t; @@ -236,6 +237,11 @@ class Program : public RuntimeObject { //! Actions to perform during program unload void unload(); + + //! Returns the program built status + bool IsProgramBuilt(const Device& device) { + return CL_BUILD_SUCCESS == devicePrograms_[&device]->buildStatus(); + } }; /*! @}