diff --git a/projects/clr/hipamd/src/CMakeLists.txt b/projects/clr/hipamd/src/CMakeLists.txt index 0f7c537ba4..e4d0202aaa 100644 --- a/projects/clr/hipamd/src/CMakeLists.txt +++ b/projects/clr/hipamd/src/CMakeLists.txt @@ -180,6 +180,19 @@ if(DISABLE_DIRECT_DISPATCH) target_compile_definitions(amdhip64 PRIVATE DISABLE_DIRECT_DISPATCH) endif() +# Optional rocm-kpack support for kpack split artifacts +# This option will be enabled permanently at a future point and gates the use +# of the rocm-kpack library for detecting ROCm multi-arch archives in +# distributions of ROCm so that the CLR can load them just as it would normal +# fat binaries. See the WIP repo: https://github.com/ROCm/rocm-kpack (which +# will be migrated to rocm-systems when ready). +option(ROCM_KPACK_ENABLED "Enable kpack runtime loading for split device code" OFF) +if(ROCM_KPACK_ENABLED) + find_package(rocm-kpack REQUIRED) + target_compile_definitions(amdhip64 PRIVATE ROCM_KPACK_ENABLED=1) + target_link_libraries(amdhip64 PRIVATE rocm::rocm_kpack) +endif() + # Short-Term solution for pre-compiled headers for online compilation # Enable pre compiled header if(__HIP_ENABLE_PCH) diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp index 800254b933..18603f768d 100644 --- a/projects/clr/hipamd/src/hip_code_object.cpp +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -32,6 +32,7 @@ THE SOFTWARE. #include #include "comgrctx.hpp" #include "hip_comgr_helper.hpp" +#include "hip_platform.hpp" namespace hip { hipError_t ihipFree(void* ptr); @@ -265,6 +266,39 @@ hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) { return hipSuccess; } + // Fat binary wrapper structure (matches hip_platform.cpp definition) + // Defined locally to keep kpack integration as implementation detail + struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; // reserved1: bundle index for multi-TU binaries + }; + + // Check if this is a kpack'd binary (HIPK magic) + const auto* wrapper = reinterpret_cast(data); + if (wrapper->magic == symbols::kHipkMagic && wrapper->version == 1) { + // Discover binary path from the wrapper address using existing CLR utility + std::string binary_path; + size_t file_offset = 0; + if (!amd::Os::FindFileNameFromAddress(data, &binary_path, &file_offset)) { + LogError("Failed to discover binary path for kpack loading"); + return hipErrorNoBinaryForGpu; + } + + // Get bundle index from wrapper->dummy1 (reserved1 field) + // For multi-TU binaries, this identifies which bundle this wrapper corresponds to + uint64_t bundle_index = reinterpret_cast(wrapper->dummy1); + + // wrapper->binary points to msgpack metadata + // ExtractKpackBinary will error if ROCM_KPACK_ENABLED=OFF + FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo( + FatBinaryInfo::KpackParams{wrapper->binary, std::move(binary_path), bundle_index}); + hipError_t err = fatBinaryInfo->ExtractKpackBinary(g_devices); + programs = fatBinaryInfo; + return err; + } + // Create a new fat binary object and extract the fat binary for all devices. FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo(nullptr, data); hipError_t err = fatBinaryInfo->ExtractFatBinaryUsingCOMGR(g_devices); @@ -287,6 +321,26 @@ FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized, bool& s return &modules_[data]; } +FatBinaryInfo** StatCO::addKpackBinary(const void* hipk_metadata, const void* wrapper_addr, + bool initialized, bool& success) { + amd::ScopedLock lock(sclock_); + + // Use wrapper_addr as the key (same as data pointer for normal path) + // This allows digestFatBinary to access the wrapper and detect HIPK magic + module_to_hostModule_.insert(std::make_pair(&modules_[wrapper_addr], wrapper_addr)); + + if (!initialized) { + // Deferred loading: modules_[wrapper_addr] is nullptr, digestFatBinary will handle it later + success = true; + return &modules_[wrapper_addr]; + } + + // Immediate loading: call digestFatBinary which handles kpack detection + hipError_t err = digestFatBinary(wrapper_addr, modules_[wrapper_addr]); + success = (err == hipSuccess); + return &modules_[wrapper_addr]; +} + hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { amd::ScopedLock lock(sclock_); diff --git a/projects/clr/hipamd/src/hip_code_object.hpp b/projects/clr/hipamd/src/hip_code_object.hpp index 20664686dd..f5feefad46 100644 --- a/projects/clr/hipamd/src/hip_code_object.hpp +++ b/projects/clr/hipamd/src/hip_code_object.hpp @@ -55,6 +55,10 @@ constexpr char kHipFatBinName_[] = "hipfatbin-"; constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-"; +// Fat binary wrapper magic values +constexpr uint32_t kHipfMagic = 0x48495046; // "HIPF" little-endian (normal fat binary) +constexpr uint32_t kHipkMagic = 0x4B504948; // "HIPK" little-endian (kpack'd binary) + // Clang Offload bundler description & Header in uncompressed mode. struct ClangOffloadBundleInfo { uint64_t offset; @@ -154,6 +158,8 @@ class StatCO : public CodeObject { // Add/Remove/Digest Fat Binaries passed to us from "__hipRegisterFatBinary" FatBinaryInfo** addFatBinary(const void* data, bool initialized, bool& success); + FatBinaryInfo** addKpackBinary(const void* hipk_metadata, const void* wrapper_addr, + bool initialized, bool& success); hipError_t removeFatBinary(FatBinaryInfo** module); hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs); void RemoveAllFatBinaries(); diff --git a/projects/clr/hipamd/src/hip_fatbin.cpp b/projects/clr/hipamd/src/hip_fatbin.cpp index 8fb5c11ea6..996e975936 100644 --- a/projects/clr/hipamd/src/hip_fatbin.cpp +++ b/projects/clr/hipamd/src/hip_fatbin.cpp @@ -24,18 +24,38 @@ THE SOFTWARE. #include "hip_fatbin.hpp" #include "hip_global.hpp" #include +#include #include "hip_code_object.hpp" #include "hip_platform.hpp" #include "comgrctx.hpp" #include "amd_hsa_elf.hpp" #include "hip_comgr_helper.hpp" +#if ROCM_KPACK_ENABLED +#include +#endif + namespace hip { // Use ComgrUniqueHandle and type aliases from hip_comgr_helper.hpp using comgr_helper::ComgrDataSetUniqueHandle; using comgr_helper::ComgrActionInfoUniqueHandle; using comgr_helper::ComgrDataUniqueHandle; +#if ROCM_KPACK_ENABLED +namespace { +// HIP process-global kpack cache - initialized on first use +std::once_flag g_hipKpackCacheInitFlag; +kpack_cache_t g_hipKpackCache = nullptr; + +void initHipKpackCache() { kpack_cache_create(&g_hipKpackCache); } + +kpack_cache_t getHipKpackCache() { + std::call_once(g_hipKpackCacheInitFlag, initHipKpackCache); + return g_hipKpackCache; +} +} // namespace +#endif + FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : foffset_(0), image_(image), image_mapped_(false), uri_(std::string()) { if (fname != nullptr) { @@ -47,6 +67,11 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) dev_programs_.resize(g_devices.size(), nullptr); } +FatBinaryInfo::FatBinaryInfo(KpackParams kpack_params) + : FatBinaryInfo(kpack_params.binary_path.c_str(), nullptr) { + kpack_params_ = std::move(kpack_params); +} + FatBinaryInfo::~FatBinaryInfo() { // Release per device fat bin info. for (int dev_id = 0; dev_id < dev_programs_.size(); dev_id++) { @@ -57,7 +82,16 @@ FatBinaryInfo::~FatBinaryInfo() { } // Release Code object allocations for (const auto& i : code_obj_allocations_) { - delete[] reinterpret_cast(i); + if (kpack_params_.has_value()) { + // Kpack-allocated code objects must be freed via kpack API +#if ROCM_KPACK_ENABLED + kpack_free_code_object(const_cast(i)); +#else + guarantee(false, "Kpack code object but ROCM_KPACK_ENABLED=OFF"); +#endif + } else { + delete[] reinterpret_cast(i); + } } ReleaseImageAndFile(); } @@ -640,6 +674,79 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector& devices) { +#if !ROCM_KPACK_ENABLED + LogError("Kpack binary detected but ROCM_KPACK_ENABLED=OFF"); + return hipErrorNotSupported; +#else + if (!kpack_params_.has_value()) { + LogError("ExtractKpackBinary called but kpack_params_ not set"); + return hipErrorInvalidValue; + } + + const auto& params = kpack_params_.value(); + if (params.metadata == nullptr) { + LogError("HIPK metadata is null"); + return hipErrorInvalidValue; + } + + // Build architecture priority list from devices + // For each device, add native ISA first, then generic fallback + std::vector arch_list; + for (auto device : devices) { + std::string device_name = device->devices()[0]->isa().isaName(); + arch_list.push_back(device_name); + + // Add generic fallback + auto generic_name = TargetToGeneric(device_name); + if (!generic_name.empty()) { + arch_list.push_back(generic_name); + } + } + + // Convert to C-style array for kpack API + std::vector arch_ptrs; + for (const auto& arch : arch_list) { + arch_ptrs.push_back(arch.c_str()); + } + + // Load code object from kpack archive + void* code_object = nullptr; + size_t code_object_size = 0; + + // binary_path is used to resolve relative paths to kpack archives. + // bundle_index identifies which code object to load for multi-TU binaries. + // The kernel_name (used for TOC lookup) is embedded in the HIPK metadata. + kpack_error_t err = + kpack_load_code_object(getHipKpackCache(), params.metadata, fname_.c_str(), + static_cast(params.bundle_index), + arch_ptrs.data(), arch_ptrs.size(), &code_object, &code_object_size); + + if (err != KPACK_SUCCESS) { + LogPrintfError("kpack_load_code_object failed with error: %d", err); + return hipErrorInvalidImage; + } + + // Add code object to all devices + for (auto device : devices) { + hipError_t hip_err = AddDevProgram(device, code_object, code_object_size, 0); + if (hip_err != hipSuccess) { + kpack_free_code_object(code_object); + return hip_err; + } + } + + // Track allocation for cleanup in destructor + code_obj_allocations_.insert(code_object); + + return hipSuccess; +#endif +} + hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size, size_t binary_offset) { int devID = device->deviceId(); diff --git a/projects/clr/hipamd/src/hip_fatbin.hpp b/projects/clr/hipamd/src/hip_fatbin.hpp index f3e7322d94..6623660572 100644 --- a/projects/clr/hipamd/src/hip_fatbin.hpp +++ b/projects/clr/hipamd/src/hip_fatbin.hpp @@ -28,6 +28,8 @@ THE SOFTWARE. #include "hip_internal.hpp" #include "platform/program.hpp" +#include + // Forward declaration for Unique FD struct UniqueFD; @@ -36,10 +38,20 @@ namespace hip { // Fat Binary Info class FatBinaryInfo { public: + // Parameters for kpack'd (split device code) binaries + struct KpackParams { + const void* metadata; //!< Msgpack metadata from .rocm_kpack_ref section + std::string binary_path; //!< Path to the host binary + uint64_t bundle_index; //!< Bundle index for multi-TU binaries (0-based) + }; + FatBinaryInfo(const char* fname, const void* image); + // Constructor for kpack'd (split device code) binaries + explicit FatBinaryInfo(KpackParams kpack_params); ~FatBinaryInfo(); hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices); + hipError_t ExtractKpackBinary(const std::vector& devices); hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size, size_t binary_offset); hipError_t BuildProgram(const int device_id); @@ -84,6 +96,9 @@ class FatBinaryInfo { // Only used for FBs where image is directly passed std::string uri_; //!< Uniform resource indicator + // Kpack parameters for split device code binaries (nullopt for normal fat binaries) + std::optional kpack_params_; + std::vector dev_programs_; //!< Program info per Device std::shared_ptr ufd_; //!< Unique file descriptor diff --git a/projects/clr/hipamd/src/hip_platform.cpp b/projects/clr/hipamd/src/hip_platform.cpp index 4b16f7f605..e9419eb512 100644 --- a/projects/clr/hipamd/src/hip_platform.cpp +++ b/projects/clr/hipamd/src/hip_platform.cpp @@ -73,6 +73,17 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const c void** __hipRegisterFatBinary(const void* data) { const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + + // Check for HIPK magic (kpack'd binary with external device code) + if (fbwrapper->magic == symbols::kHipkMagic && fbwrapper->version == 1) { + // For HIPK binaries, fbwrapper->binary points to msgpack metadata + // Route through addKpackBinary which will error if ROCM_KPACK_ENABLED=OFF + bool success = false; + auto fat_binary_info = PlatformState::instance().addKpackBinary(fbwrapper->binary, data, success); + return success ? reinterpret_cast(fat_binary_info) : nullptr; + } + + // Normal HIPF path if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic, fbwrapper->version); @@ -1003,6 +1014,11 @@ hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data, bool& success return statCO_.addFatBinary(data, initialized_, success); } +hip::FatBinaryInfo** PlatformState::addKpackBinary(const void* hipk_metadata, + const void* wrapper_addr, bool& success) { + return statCO_.addKpackBinary(hipk_metadata, wrapper_addr, initialized_, success); +} + hipError_t PlatformState::removeFatBinary(hip::FatBinaryInfo** module) { return statCO_.removeFatBinary(module); } diff --git a/projects/clr/hipamd/src/hip_platform.hpp b/projects/clr/hipamd/src/hip_platform.hpp index ba8261f3d2..5148792ae0 100644 --- a/projects/clr/hipamd/src/hip_platform.hpp +++ b/projects/clr/hipamd/src/hip_platform.hpp @@ -87,6 +87,8 @@ class PlatformState { // Static Code Objects functions hip::FatBinaryInfo** addFatBinary(const void* data, bool& success); + hip::FatBinaryInfo** addKpackBinary(const void* hipk_metadata, const void* wrapper_addr, + bool& success); hipError_t removeFatBinary(hip::FatBinaryInfo** module); hipError_t digestFatBinary(const void* data, hip::FatBinaryInfo*& programs);