rocr: support reserving non-registered VA
Extend hsa_amd_vmem_address_reserve/hsa_amd_vmem_address_reserve_align
to support HSA_AMD_VMEM_ADDRESS_NO_REGISTER flag. This allocation can be
used to reserve virtual address ranges that can later be used by
hsa_amd_svm_attributes_set for SVM based memory allocations.
[ROCm/ROCR-Runtime commit: b3c48cc68c]
Этот коммит содержится в:
коммит произвёл
Yat Sin, David
родитель
14b5faf333
Коммит
39bddd8b9d
@@ -87,7 +87,7 @@ include(utils)
|
||||
|
||||
|
||||
## Get version strings
|
||||
get_version("1.17.0")
|
||||
get_version("1.18.0")
|
||||
if (${ROCM_PATCH_VERSION})
|
||||
set(VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
endif()
|
||||
|
||||
@@ -802,11 +802,14 @@ class Runtime {
|
||||
typedef void* ThunkHandle;
|
||||
|
||||
struct AddressHandle {
|
||||
AddressHandle() : size(0), use_count(0) {}
|
||||
AddressHandle(size_t size) : size(size), use_count(0) {}
|
||||
AddressHandle() : os_addr(nullptr), size(0), use_count(0), registered(false) {}
|
||||
AddressHandle(void* addr, size_t _size, bool _registered) : os_addr(addr), size(_size), use_count(0), registered(_registered) {}
|
||||
|
||||
// Address returned by OS. May be different from user address when adjusted for alignment
|
||||
void *os_addr;
|
||||
size_t size;
|
||||
int use_count;
|
||||
bool registered;
|
||||
};
|
||||
std::map<const void*, AddressHandle> reserved_address_map_; // Indexed by VA
|
||||
|
||||
|
||||
@@ -1343,7 +1343,10 @@ hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t addre
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
IS_ZERO(size);
|
||||
IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported());
|
||||
|
||||
if (!(flags & HSA_AMD_VMEM_ADDRESS_NO_REGISTER))
|
||||
IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported());
|
||||
|
||||
return core::Runtime::runtime_singleton_->VMemoryAddressReserve(va, size, address, 0, flags);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
@@ -3144,6 +3144,26 @@ hsa_status_t Runtime::VMemoryAddressReserve(void** va, size_t size, uint64_t add
|
||||
|
||||
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
|
||||
|
||||
if (flags & HSA_AMD_VMEM_ADDRESS_NO_REGISTER) {
|
||||
size_t requested = size + alignment - sysconf(_SC_PAGE_SIZE);
|
||||
auto mem = mmap(addr, requested, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
|
||||
if (mem == MAP_FAILED)
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
auto aligned = AlignUp(mem, alignment);
|
||||
|
||||
// Hint to enable THP for large host allocations which can help in performance gain
|
||||
constexpr size_t kLargePageSize = 2*1024*1024;
|
||||
if (size >= kLargePageSize) {
|
||||
if (madvise(aligned, size, MADV_HUGEPAGE))
|
||||
debug_warning(false && "madvise with MADV_HUGEPAGE failed");
|
||||
}
|
||||
|
||||
reserved_address_map_[aligned] = AddressHandle(mem, size, false);
|
||||
*va = aligned;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
memFlags.ui32.OnlyAddress = 1;
|
||||
memFlags.ui32.FixedAddress = 1;
|
||||
|
||||
@@ -3155,7 +3175,7 @@ hsa_status_t Runtime::VMemoryAddressReserve(void** va, size_t size, uint64_t add
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
reserved_address_map_[addr] = AddressHandle(size);
|
||||
reserved_address_map_[addr] = AddressHandle(addr, size, true);
|
||||
*va = addr;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -3173,7 +3193,10 @@ hsa_status_t Runtime::VMemoryAddressFree(void* va, size_t size) {
|
||||
|
||||
if (it->second.use_count > 0) return HSA_STATUS_ERROR_RESOURCE_FREE;
|
||||
|
||||
if (HSAKMT_CALL(hsaKmtFreeMemory(va, size)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
|
||||
if (it->second.registered)
|
||||
if (HSAKMT_CALL(hsaKmtFreeMemory(it->second.os_addr, size)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
|
||||
else
|
||||
if (munmap(it->second.os_addr, size)) return HSA_STATUS_ERROR;
|
||||
|
||||
reserved_address_map_.erase(it);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
@@ -60,9 +60,10 @@
|
||||
* - 1.7 - hsa_amd_signal_wait_all
|
||||
* - 1.8 - hsa_amd_memory_get_preferred_copy_engine
|
||||
* - 1.9 - hsa_amd_portable_export_dmabuf_v2
|
||||
* - 1.10 - hsa_amd_vmem_address_reserve: HSA_AMD_VMEM_ADDRESS_NO_REGISTER
|
||||
*/
|
||||
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 9
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 10
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -3264,6 +3265,11 @@ hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
|
||||
*/
|
||||
hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);
|
||||
|
||||
typedef enum hsa_amd_vmem_address_reserve_flag_s {
|
||||
// Only reserve a VA range without registering it to the underlying driver
|
||||
HSA_AMD_VMEM_ADDRESS_NO_REGISTER = (1UL << 0),
|
||||
} hsa_amd_vmem_address_reserve_flag_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a reserved address range
|
||||
*
|
||||
@@ -3275,7 +3281,7 @@ hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);
|
||||
* @param[out] va virtual address allocated
|
||||
* @param[in] size of address range requested
|
||||
* @param[in] address requested
|
||||
* @param[in] flags currently unsupported
|
||||
* @param[in] flags optional hsa_amd_vmem_address_reserve_flag_t
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
|
||||
*
|
||||
@@ -3303,7 +3309,7 @@ hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t addre
|
||||
* @param[in] size of address range requested
|
||||
* @param[in] address requested
|
||||
* @param[in] alignment requested. 0 for default. Must be >= page-size and a power of 2
|
||||
* @param[in] flags currently unsupported
|
||||
* @param[in] flags optional hsa_amd_vmem_address_reserve_flag_t
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
|
||||
*
|
||||
|
||||
Ссылка в новой задаче
Block a user