SWDEV-508538 - Optimize mem access and pack structure (#71)

Change-Id: Ib05b8891a6d228fc3266918a000d332fddc7438b
这个提交包含在:
Godavarthy Surya, Anusha
2025-04-21 13:43:25 +05:30
提交者 GitHub
父节点 99142c3dd9
当前提交 bf28bbd9ab
修改 4 个文件,包含 88 行新增130 行删除
+65 -94
查看文件
@@ -93,14 +93,6 @@ typedef ComgrUniqueHandle<amd_comgr_data_t> ComgrDataUniqueHandle;
} // namespace comgr_helper
FatBinaryDeviceInfo::~FatBinaryDeviceInfo() {
if (program_ != nullptr) {
program_->unload();
program_->release();
program_ = nullptr;
}
}
FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image)
: fdesc_(amd::Os::FDescInit()),
fsize_(0),
@@ -114,19 +106,22 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image)
fname_ = std::string();
}
fatbin_dev_info_.resize(g_devices.size(), nullptr);
dev_programs_.resize(g_devices.size(), nullptr);
}
FatBinaryInfo::~FatBinaryInfo() {
// Different devices in the same model have the same binary_image_
std::set<const void*> toDelete;
// Release per device fat bin info.
for (auto* fbd : fatbin_dev_info_) {
if (fbd != nullptr) {
if (fbd->binary_image_ && fbd->binary_offset_ == 0 && fbd->binary_image_ != image_) {
toDelete.insert(fbd->binary_image_);
for (int dev_id = 0; dev_id < dev_programs_.size(); dev_id++) {
if (dev_programs_[dev_id] != nullptr) {
auto& binaryInfo = dev_programs_[dev_id]->binary(*g_devices[dev_id]->devices()[0]);
if (std::get<0>(binaryInfo) && std::get<1>(binaryInfo).second == 0 &&
std::get<0>(binaryInfo) != image_) {
toDelete.insert(std::get<0>(binaryInfo));
}
delete fbd;
dev_programs_[dev_id]->release();
dev_programs_[dev_id] = nullptr;
}
}
for (auto itemData : toDelete) {
@@ -242,12 +237,8 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
hip_status = hipErrorInvalidImage;
break;
}
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(image_, elf_size, 0);
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) {
hip_status = hipErrorOutOfMemory;
hip_status = AddDevProgram(devices[dev_idx], image_, elf_size, 0);
if (hip_status != hipSuccess) {
break;
}
}
@@ -435,10 +426,12 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
LogPrintfInfo("Using Native code object: %s", device->devices()[0]->isa().targetId());
guarantee(unique_isa_names.cend() != dev_it,
"Cannot find the device name in the unique device name");
fatbin_dev_info_[device->deviceId()] = new FatBinaryDeviceInfo(
reinterpret_cast<address>(const_cast<void*>(image_)) + dev_it->second.second,
hip_status = AddDevProgram(
device, reinterpret_cast<address>(const_cast<void*>(image_)) + dev_it->second.second,
dev_it->second.first, dev_it->second.second);
fatbin_dev_info_[device->deviceId()]->program_ = new amd::Program(*(device->asContext()));
if (hip_status != hipSuccess) {
break;
}
} else if (spirv_isa_found) {
// Compile to bitcode once
std::call_once(spirv_to_bc_flag, compile_spv_bitcode);
@@ -454,9 +447,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
char* co = new char[code_iter->second.second];
std::memcpy(co, code_iter->second.first, code_iter->second.second);
LogPrintfInfo("reusing code object for: %s", target_id.c_str());
fatbin_dev_info_[device->deviceId()] =
new FatBinaryDeviceInfo(co, code_iter->second.second, 0);
fatbin_dev_info_[device->deviceId()]->program_ = new amd::Program(*(device->asContext()));
hip_status = AddDevProgram(device, co, code_iter->second.second, 0);
if (hip_status != hipSuccess) {
break;
}
continue;
}
@@ -554,9 +548,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
}
auto elf_size = CodeObject::ElfSize(co);
fatbin_dev_info_[device->deviceId()] = new FatBinaryDeviceInfo(co, elf_size, 0);
fatbin_dev_info_[device->deviceId()]->program_ = new amd::Program(*(device->asContext()));
hip_status = AddDevProgram(device, co, elf_size, 0);
if (hip_status != hipSuccess) {
break;
}
// Save the compiled code object
compiled_co[target_id] = std::make_pair(co, elf_size);
} else {
@@ -657,13 +652,9 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devi
// Calculate the offset wrt binary_image and the original image
size_t offset_l = (reinterpret_cast<address>(const_cast<void*>(code_objs[dev_idx].first)) -
reinterpret_cast<address>(const_cast<void*>(image_)));
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l);
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
hip_error = AddDevProgram(devices[dev_idx], code_objs[dev_idx].first,
code_objs[dev_idx].second, offset_l);
if (hip_error != hipSuccess) {
break;
}
}
@@ -671,54 +662,45 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devi
return hip_error;
}
const void* binary_image;
size_t binary_size;
size_t binary_offset;
if (hip_error == hipErrorInvalidKernelFile) {
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
// the image type is no CLANG_OFFLOAD_BUNDLER, image for current device directly passed
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0);
hip_error = AddDevProgram(devices[dev_idx], image_, CodeObject::ElfSize(image_), 0);
if (hip_error != hipSuccess) {
return hip_error;
}
}
} else if (hip_error == hipSuccess) {
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
// Calculate the offset wrt binary_image and the original image
size_t offset_l = (reinterpret_cast<address>(const_cast<void*>(code_objs[dev_idx].first)) -
binary_offset = (reinterpret_cast<address>(const_cast<void*>(code_objs[dev_idx].first)) -
reinterpret_cast<address>(const_cast<void*>(image_)));
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l);
hip_error = AddDevProgram(devices[dev_idx], code_objs[dev_idx].first,
code_objs[dev_idx].second, binary_offset);
if (hip_error != hipSuccess) {
return hip_error;
}
}
}
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
return hipErrorOutOfMemory;
}
}
return hipSuccess;
}
hipError_t FatBinaryInfo::AddDevProgram(const int device_id) {
// Device Id bounds Check
DeviceIdCheck(device_id);
FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id];
if (fbd_info == nullptr) {
return hipErrorInvalidKernelFile;
hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image,
size_t binary_size, size_t binary_offset) {
int devID = device->deviceId();
amd::Context* ctx = device->asContext();
amd::Program* program = new amd::Program(*ctx);
dev_programs_[devID] = program;
if (program == nullptr) {
return hipErrorOutOfMemory;
}
// If fat binary was already added, skip this step and return success
if (fbd_info->add_dev_prog_ == false) {
amd::Context* ctx = g_devices[device_id]->asContext();
if (CL_SUCCESS !=
fbd_info->program_->addDeviceProgram(*ctx->devices()[0], fbd_info->binary_image_,
fbd_info->binary_size_, false, nullptr, nullptr,
fdesc_, fbd_info->binary_offset_, uri_)) {
return hipErrorInvalidKernelFile;
}
fbd_info->add_dev_prog_ = true;
if (CL_SUCCESS !=
program->addDeviceProgram(*ctx->devices()[0], binary_image, binary_size, false, nullptr,
nullptr, fdesc_, binary_offset, uri_)) {
return hipErrorInvalidKernelFile;
}
return hipSuccess;
}
@@ -726,21 +708,17 @@ hipError_t FatBinaryInfo::AddDevProgram(const int device_id) {
hipError_t FatBinaryInfo::BuildProgram(const int device_id) {
// Device Id Check and Add DeviceProgram if not added so far
DeviceIdCheck(device_id);
IHIP_RETURN_ONFAIL(AddDevProgram(device_id));
// If Program was already built skip this step and return success
FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id];
if (fbd_info->prog_built_ == false) {
if (dev_programs_[device_id]->IsProgramBuilt(*g_devices[device_id]->devices()[0]) == false) {
if (CL_SUCCESS !=
fbd_info->program_->build(g_devices[device_id]->devices(), nullptr, nullptr, nullptr,
kOptionChangeable, kNewDevProg)) {
dev_programs_[device_id]->build(g_devices[device_id]->devices(), nullptr, nullptr, nullptr,
kOptionChangeable, kNewDevProg)) {
return hipErrorNoBinaryForGpu;
}
if (!dev_programs_[device_id]->load()) {
return hipErrorNoBinaryForGpu;
}
fbd_info->prog_built_ = true;
}
if (!fbd_info->program_->load()) {
return hipErrorNoBinaryForGpu;
}
return hipSuccess;
}
@@ -766,13 +744,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) {
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
if (code_objs[dev_idx].first) {
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) {
break;
hip_status =
AddDevProgram(devices[dev_idx], code_objs[dev_idx].first, code_objs[dev_idx].second, 0);
if (hip_status != hipSuccess) {
return hip_status;
}
} else {
// This is the case of hipErrorNoBinaryForGpu which will finally fail app on device
@@ -785,13 +760,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data,
hip_status = hipSuccess;
// If the image ptr is not clang offload bundle then just directly point the image.
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
fatbin_dev_info_[devices[dev_idx]->deviceId()] =
new FatBinaryDeviceInfo(data, CodeObject::ElfSize(data), 0);
fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ =
new amd::Program(*devices[dev_idx]->asContext());
if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) {
hip_status = hipErrorOutOfMemory;
break;
hip_status = AddDevProgram(devices[dev_idx], data, CodeObject::ElfSize(data), 0);
if (hip_status != hipSuccess) {
return hip_status;
}
}
} else {
+8 -31
查看文件
@@ -33,30 +33,6 @@ struct UniqueFD;
namespace hip {
//Fat Binary Per Device info
class FatBinaryDeviceInfo {
public:
FatBinaryDeviceInfo (const void* binary_image, size_t binary_size, size_t binary_offset)
: binary_image_(binary_image), binary_size_(binary_size),
binary_offset_(binary_offset), program_(nullptr),
add_dev_prog_(false), prog_built_(false) {}
~FatBinaryDeviceInfo();
private:
const void* binary_image_; // binary image ptr
size_t binary_size_; // binary image size
size_t binary_offset_; // image offset from original
amd::Program* program_; // reinterpreted as hipModule_t
friend class FatBinaryInfo;
//Control Variables
bool add_dev_prog_;
bool prog_built_;
};
// Fat Binary Info
class FatBinaryInfo {
public:
@@ -83,29 +59,31 @@ public:
hipError_t ExtractFatBinaryUsingCOMGR(const void* data,
const std::vector<hip::Device*>& devices);
hipError_t ExtractFatBinary(const std::vector<hip::Device*>& devices);
hipError_t AddDevProgram(const int device_id);
hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size,
size_t binary_offset);
hipError_t BuildProgram(const int device_id);
// Device Id bounds check
inline void DeviceIdCheck(const int device_id) const {
guarantee(device_id >= 0, "Invalid DeviceId less than 0");
guarantee(static_cast<size_t>(device_id) < fatbin_dev_info_.size(), "Invalid DeviceId, greater than no of fatbin device info!");
guarantee(static_cast<size_t>(device_id) < dev_programs_.size(),
"Invalid DeviceId, greater than no of device programs!");
}
// Getter Methods
amd::Program* GetProgram(int device_id) {
DeviceIdCheck(device_id);
return fatbin_dev_info_[device_id]->program_;
return dev_programs_[device_id];
}
hipModule_t Module(int device_id) const {
DeviceIdCheck(device_id);
return reinterpret_cast<hipModule_t>(as_cl(fatbin_dev_info_[device_id]->program_));
return reinterpret_cast<hipModule_t>(as_cl(dev_programs_[device_id]));
}
hipError_t GetModule(int device_id, hipModule_t* hmod) const {
DeviceIdCheck(device_id);
*hmod = reinterpret_cast<hipModule_t>(as_cl(fatbin_dev_info_[device_id]->program_));
*hmod = reinterpret_cast<hipModule_t>(as_cl(dev_programs_[device_id]));
return hipSuccess;
}
@@ -125,8 +103,7 @@ private:
// Only used for FBs where image is directly passed
std::string uri_; //!< Uniform resource indicator
// Per Device Info, like corresponding binary ptr, size.
std::vector<FatBinaryDeviceInfo*> fatbin_dev_info_;
std::vector<amd::Program*> dev_programs_; //!< Program info per Device
std::shared_ptr<UniqueFD> ufd_; //!< Unique file descriptor
amd::Monitor fb_lock_{true}; //!< Lock for the fat binary access
+8 -4
查看文件
@@ -73,6 +73,7 @@ static void remove_g_option(std::string &option)
}
Program::~Program() {
unload();
// Destroy all device programs
for (const auto& it : devicePrograms_) {
delete it.second;
@@ -196,7 +197,7 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng
}
// Save the original image
binary_[&rootDev] = std::make_tuple(memory, length, make_copy);
binary_[&rootDev] = std::make_tuple(memory, std::make_pair(length, foffset), make_copy);
}
const device::Program* same_dev_prog = nullptr;
@@ -278,7 +279,8 @@ int32_t Program::compile(const std::vector<Device*>& devices, size_t numHeaders,
device::Program* devProgram = getDeviceProgram(*it);
if (devProgram == NULL) {
const binary_t& bin = binary(*it);
retval = addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin), false, &parsedOptions);
retval =
addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin).first, false, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
@@ -397,7 +399,8 @@ int32_t Program::link(const std::vector<Device*>& devices, size_t numInputs,
device::Program* devProgram = getDeviceProgram(*it);
if (devProgram == NULL) {
const binary_t& bin = binary(*it);
retval = addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin), false, &parsedOptions);
retval =
addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin).first, false, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
@@ -531,7 +534,8 @@ int32_t Program::build(const std::vector<Device*>& devices, const char* options,
retval = false;
continue;
}
retval = addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin), false, &parsedOptions);
retval =
addDeviceProgram(*it, std::get<0>(bin), std::get<1>(bin).first, false, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
+7 -1
查看文件
@@ -80,7 +80,8 @@ class Context;
//! A collection of binaries for devices in the associated context.
class Program : public RuntimeObject {
public:
typedef std::tuple<const uint8_t* /*image*/, size_t /*size*/, bool /*allocated*/> binary_t;
typedef std::tuple<const uint8_t* /*image*/, std::pair<size_t /*size*/, size_t /* file_offset */>,
bool /*allocated*/> binary_t;
typedef std::set<Device const*> devicelist_t;
typedef std::unordered_map<Device const*, binary_t> devicebinary_t;
typedef std::unordered_map<Device const*, device::Program*> deviceprograms_t;
@@ -236,6 +237,11 @@ class Program : public RuntimeObject {
//! Actions to perform during program unload
void unload();
//! Returns the program built status
bool IsProgramBuilt(const Device& device) {
return CL_BUILD_SUCCESS == devicePrograms_[&device]->buildStatus();
}
};
/*! @}