diff --git a/projects/rocr-runtime/CMakeLists.txt b/projects/rocr-runtime/CMakeLists.txt index 935e430807..8177c7e54b 100644 --- a/projects/rocr-runtime/CMakeLists.txt +++ b/projects/rocr-runtime/CMakeLists.txt @@ -87,7 +87,7 @@ include(utils) ## Get version strings -get_version("1.17.0") +get_version("1.18.0") if (${ROCM_PATCH_VERSION}) set(VERSION_PATCH ${ROCM_PATCH_VERSION}) endif() diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index 4c3a1cea00..0cbc742685 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -802,11 +802,14 @@ class Runtime { typedef void* ThunkHandle; struct AddressHandle { - AddressHandle() : size(0), use_count(0) {} - AddressHandle(size_t size) : size(size), use_count(0) {} + AddressHandle() : os_addr(nullptr), size(0), use_count(0), registered(false) {} + AddressHandle(void* addr, size_t _size, bool _registered) : os_addr(addr), size(_size), use_count(0), registered(_registered) {} + // Address returned by OS. May be different from user address when adjusted for alignment + void *os_addr; size_t size; int use_count; + bool registered; }; std::map reserved_address_map_; // Indexed by VA diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index ecc0ff82b6..e3d2349e7f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -1343,7 +1343,10 @@ hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t addre TRY; IS_OPEN(); IS_ZERO(size); - IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported()); + + if (!(flags & HSA_AMD_VMEM_ADDRESS_NO_REGISTER)) + IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported()); + return core::Runtime::runtime_singleton_->VMemoryAddressReserve(va, size, address, 0, flags); CATCH; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 0bfbe58fb3..84469ba5ea 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -3144,6 +3144,26 @@ hsa_status_t Runtime::VMemoryAddressReserve(void** va, size_t size, uint64_t add ScopedAcquire lock(&memory_lock_); + if (flags & HSA_AMD_VMEM_ADDRESS_NO_REGISTER) { + size_t requested = size + alignment - sysconf(_SC_PAGE_SIZE); + auto mem = mmap(addr, requested, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); + if (mem == MAP_FAILED) + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + auto aligned = AlignUp(mem, alignment); + + // Hint to enable THP for large host allocations which can help in performance gain + constexpr size_t kLargePageSize = 2*1024*1024; + if (size >= kLargePageSize) { + if (madvise(aligned, size, MADV_HUGEPAGE)) + debug_warning(false && "madvise with MADV_HUGEPAGE failed"); + } + + reserved_address_map_[aligned] = AddressHandle(mem, size, false); + *va = aligned; + return HSA_STATUS_SUCCESS; + } + memFlags.ui32.OnlyAddress = 1; memFlags.ui32.FixedAddress = 1; @@ -3155,7 +3175,7 @@ hsa_status_t Runtime::VMemoryAddressReserve(void** va, size_t size, uint64_t add return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } - reserved_address_map_[addr] = AddressHandle(size); + reserved_address_map_[addr] = AddressHandle(addr, size, true); *va = addr; return HSA_STATUS_SUCCESS; } @@ -3173,7 +3193,10 @@ hsa_status_t Runtime::VMemoryAddressFree(void* va, size_t size) { if (it->second.use_count > 0) return HSA_STATUS_ERROR_RESOURCE_FREE; - if (HSAKMT_CALL(hsaKmtFreeMemory(va, size)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + if (it->second.registered) + if (HSAKMT_CALL(hsaKmtFreeMemory(it->second.os_addr, size)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + else + if (munmap(it->second.os_addr, size)) return HSA_STATUS_ERROR; reserved_address_map_.erase(it); return HSA_STATUS_SUCCESS; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index 4bd06ab952..9c5cbb2bc1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -60,9 +60,10 @@ * - 1.7 - hsa_amd_signal_wait_all * - 1.8 - hsa_amd_memory_get_preferred_copy_engine * - 1.9 - hsa_amd_portable_export_dmabuf_v2 + * - 1.10 - hsa_amd_vmem_address_reserve: HSA_AMD_VMEM_ADDRESS_NO_REGISTER */ #define HSA_AMD_INTERFACE_VERSION_MAJOR 1 -#define HSA_AMD_INTERFACE_VERSION_MINOR 9 +#define HSA_AMD_INTERFACE_VERSION_MINOR 10 #ifdef __cplusplus extern "C" { @@ -3264,6 +3265,11 @@ hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size, */ hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf); +typedef enum hsa_amd_vmem_address_reserve_flag_s { + // Only reserve a VA range without registering it to the underlying driver + HSA_AMD_VMEM_ADDRESS_NO_REGISTER = (1UL << 0), +} hsa_amd_vmem_address_reserve_flag_t; + /** * @brief Allocate a reserved address range * @@ -3275,7 +3281,7 @@ hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf); * @param[out] va virtual address allocated * @param[in] size of address range requested * @param[in] address requested - * @param[in] flags currently unsupported + * @param[in] flags optional hsa_amd_vmem_address_reserve_flag_t * * @retval ::HSA_STATUS_SUCCESS Address range allocated successfully * @@ -3303,7 +3309,7 @@ hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t addre * @param[in] size of address range requested * @param[in] address requested * @param[in] alignment requested. 0 for default. Must be >= page-size and a power of 2 - * @param[in] flags currently unsupported + * @param[in] flags optional hsa_amd_vmem_address_reserve_flag_t * * @retval ::HSA_STATUS_SUCCESS Address range allocated successfully *