libhsakmt: Add mmap-based aperture management for GFXv9 and later

If the GPU virtual address space is >= 47 bits, don't reserve virtual address space at startup and use mmap to allocate virtual addresses. Change-Id: Ic935b03c8e78271829fc8e6cfd0e543184aff818 Signed-off-by: Felix Kuehling <felix.kuehling@gmail.com> [ROCm/ROCR-Runtime commit: 80f2cc644c]
2018-08-04 21:20:27 -04:00
parent 7cd22e8785
commit 3dfb956bd5
1 changed files with 142 additions and 4 deletions
@@ -139,8 +139,13 @@ static const manageable_aperture_ops_t reserved_aperture_ops = {
 };

 /* Unreserved aperture type using mmap to allocate virtual address space */
+static void *mmap_aperture_allocate_aligned(manageable_aperture_t *aper,
+					    uint64_t size, uint64_t align);
+static void mmap_aperture_release(manageable_aperture_t *aper,
+				  void *addr, uint64_t size);
 static const manageable_aperture_ops_t mmap_aperture_ops = {
-	NULL, NULL /* TODO */
+	mmap_aperture_allocate_aligned,
+	mmap_aperture_release
 };

 struct manageable_aperture {
@@ -673,6 +678,84 @@ static void *reserved_aperture_allocate_aligned(manageable_aperture_t *app,
 	return start;
 }

+static void *mmap_aperture_allocate_aligned(manageable_aperture_t *aper,
+					    uint64_t size, uint64_t align)
+{
+	uint64_t aligned_padded_size, guard_size;
+	void *addr, *aligned_addr, *aligned_end, *mapping_end;
+
+	if (!aper->is_cpu_accessible) {
+		pr_err("MMap Aperture must be CPU accessible\n");
+		return NULL;
+	}
+
+	if (align < aper->align)
+		align = aper->align;
+
+	/* Align big buffers to the next power-of-2 up to huge page
+	 * size for flexible fragment size TLB optimizations
+	 */
+	while (align < GPU_HUGE_PAGE_SIZE && size >= (align << 1))
+		align <<= 1;
+
+	/* Align memory size to match aperture requirements */
+	size = ALIGN_UP(size, aper->align);
+
+	/* Add padding to guarantee proper alignment and leave guard
+	 * pages on both sides
+	 */
+	guard_size = (uint64_t)aper->guard_pages * PAGE_SIZE;
+	aligned_padded_size = size + align +
+		2*guard_size - PAGE_SIZE;
+
+	/* Map memory */
+	addr = mmap(0, aligned_padded_size, PROT_NONE,
+		    MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+	if (addr == MAP_FAILED) {
+		pr_err("mmap failed: %s\n", strerror(errno));
+		return NULL;
+	}
+
+	/* Adjust for alignment and guard pages, range-check the reslt */
+	aligned_addr = (void *)ALIGN_UP((uint64_t)addr + guard_size, align);
+	if (aligned_addr < aper->base ||
+	    VOID_PTR_ADD(aligned_addr, size - 1) > aper->limit) {
+		pr_err("mmap returned %p, out of range %p-%p\n", aligned_addr,
+		       aper->base, aper->limit);
+		munmap(addr, aligned_padded_size);
+		return NULL;
+	}
+
+	/* Unmap padding and guard pages */
+	if (aligned_addr > addr)
+		munmap(addr, VOID_PTRS_SUB(aligned_addr, addr));
+
+	aligned_end = VOID_PTR_ADD(aligned_addr, size);
+	mapping_end = VOID_PTR_ADD(addr, aligned_padded_size);
+	if (mapping_end > aligned_end)
+		munmap(aligned_end, VOID_PTRS_SUB(mapping_end, aligned_end));
+
+	return aligned_addr;
+}
+
+static void mmap_aperture_release(manageable_aperture_t *aper,
+				  void *addr, uint64_t size)
+{
+	if (!aper->is_cpu_accessible) {
+		pr_err("MMap Aperture must be CPU accessible\n");
+		return;
+	}
+
+	/* Align memory size to match aperture requirements */
+	size = ALIGN_UP(size, aper->align);
+
+	/* Reset NUMA policy */
+	mbind(addr, size, MPOL_DEFAULT, NULL, 0, 0);
+
+	/* Unmap memory */
+	munmap(addr, size);
+}
+
 /* Wrapper functions to call aperture-specific VA management functions */
 static void *aperture_allocate_area_aligned(manageable_aperture_t *app,
 					    uint64_t MemorySizeInBytes,
@@ -1561,6 +1644,43 @@ static HSAKMT_STATUS acquire_vm(uint32_t gpu_id, int fd)
 	return HSAKMT_STATUS_SUCCESS;
 }

+static HSAKMT_STATUS init_mmap_apertures(HSAuint64 base, HSAuint64 limit,
+					 HSAuint32 align, HSAuint32 guard_pages)
+{
+	void *addr;
+
+	/* Set up one SVM aperture */
+	svm.apertures[SVM_DEFAULT].base  = (void *)base;
+	svm.apertures[SVM_DEFAULT].limit = (void *)limit;
+	svm.apertures[SVM_DEFAULT].align = align;
+	svm.apertures[SVM_DEFAULT].guard_pages = guard_pages;
+	svm.apertures[SVM_DEFAULT].is_cpu_accessible = true;
+	svm.apertures[SVM_DEFAULT].ops = &mmap_aperture_ops;
+
+	svm.apertures[SVM_COHERENT].base = svm.apertures[SVM_COHERENT].limit =
+		NULL;
+
+	/* Try to allocate one page. If it fails, we'll fall back to
+	 * managing our own reserved address range.
+	 */
+	addr = aperture_allocate_area(&svm.apertures[SVM_DEFAULT], PAGE_SIZE);
+	if (addr) {
+		aperture_release_area(&svm.apertures[SVM_DEFAULT], addr,
+				      PAGE_SIZE);
+
+		svm.dgpu_aperture = svm.dgpu_alt_aperture =
+			&svm.apertures[SVM_DEFAULT];
+		pr_info("Initialized unreserved SVM apertures: %p - %p\n",
+			svm.apertures[SVM_DEFAULT].base,
+			svm.apertures[SVM_DEFAULT].limit);
+	} else {
+		pr_info("Failed to allocate unreserved SVM address space.\n");
+		pr_info("Falling back to reserved SVM apertures.\n");
+	}
+
+	return addr ? HSAKMT_STATUS_SUCCESS : HSAKMT_STATUS_ERROR;
+}
+
 static void *reserve_address(void *addr, unsigned long long int len)
 {
 	void *ret_addr;
@@ -1597,8 +1717,26 @@ static HSAKMT_STATUS init_svm_apertures(HSAuint64 base, HSAuint64 limit,
 	if (dgpu_shared_aperture_limit)
 		return HSAKMT_STATUS_SUCCESS;

+	/* Align base and limit to huge page size */
 	base = ALIGN_UP(base, GPU_HUGE_PAGE_SIZE);
 	limit = ((limit + 1) & ~(HSAuint64)(GPU_HUGE_PAGE_SIZE - 1)) - 1;
+
+	/* If the limit is greater or equal 47-bits of address space,
+	 * it means we have GFXv9 or later GPUs only. We don't need
+	 * apertures to determine the MTYPE and the virtual address
+	 * space of the GPUs covers the full CPU address range (on
+	 * x86_64) or at least mmap is unlikely to run out of
+	 * addresses the GPUs can handle.
+	 */
+	if (limit >= (1ULL << 47) - 1) {
+		HSAKMT_STATUS status = init_mmap_apertures(base, limit, align,
+							   guard_pages);
+
+		if (status == HSAKMT_STATUS_SUCCESS)
+			return status;
+		/* fall through: fall back to reserved address space */
+	}
+
 	if (limit > SVM_RESERVATION_LIMIT)
 		limit = SVM_RESERVATION_LIMIT;
 	if (base >= limit) {
@@ -3431,10 +3569,10 @@ void fmm_clear_all_mem(void)
 		fmm_clear_aperture(&gpu_mem[i].scratch_physical);
 	}

-	if (dgpu_shared_aperture_limit) {
-		fmm_clear_aperture(&svm.apertures[SVM_DEFAULT]);
-		fmm_clear_aperture(&svm.apertures[SVM_COHERENT]);
+	fmm_clear_aperture(&svm.apertures[SVM_DEFAULT]);
+	fmm_clear_aperture(&svm.apertures[SVM_COHERENT]);

+	if (dgpu_shared_aperture_limit) {
 		/* Use the same dgpu range as the parent. If failed, then set
 		 * is_dgpu_mem_init to false. Later on dgpu_mem_init will try
 		 * to get a new range