From 42392f093ff6ec885ba003289e8bf37befde8341 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 13 Sep 2019 16:04:36 -0400 Subject: [PATCH] libhsakmt: handle NUMA system with no memory on node 0 on NUMA system, node 0 may have no memory, application pass node id 0 to hsaKmtAllocMemory will fail because mbind to specify the allocation from node 0 return EINVAL. Add new flag NoNUMABind for application to pass it to hsaKmtAllocMemory to skip mbind. hsaKmtCreateEvent and hsaKmtCreateQueue specify the new flag NoNUMABind to allocate system memory for event page and CWSR area, don't bind the system memory to a specific NUMA node. Change-Id: I854e5a57502c7807c4c5ff2e441d499ae515c309 Signed-off-by: Philip Yang --- include/hsakmttypes.h | 3 ++- src/fmm.c | 7 +++++-- src/queues.c | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h index 5d6df38f95..5ac51c2208 100644 --- a/include/hsakmttypes.h +++ b/include/hsakmttypes.h @@ -533,7 +533,8 @@ typedef struct _HsaMemFlags // The KFD will ensure that the memory returned is allocated in the optimal memory location // and optimal alignment requirements unsigned int FixedAddress : 1; // Allocate memory at specified virtual address. Fail if address is not free. - unsigned int Reserved : 16; + unsigned int NoNUMABind: 1; // Don't bind system memory to a specific NUMA node + unsigned int Reserved : 15; } ui32; HSAuint32 Value; diff --git a/src/fmm.c b/src/fmm.c index e7c83d3fd3..a3f6e7b6a2 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -1397,7 +1397,7 @@ void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, flags.Value = 0; flags.ui32.NonPaged = 1; flags.ui32.HostAccess = 1; - flags.ui32.Reserved = 0xBe11; + flags.ui32.Reserved = 0xBe1; pthread_mutex_lock(&aperture->fmm_mutex); vm_obj->flags = flags.Value; @@ -1462,10 +1462,13 @@ static int bind_mem_to_numa(uint32_t node_id, void *mem, int num_node; long r; + if (flags.ui32.NoNUMABind) + return 0; + if (numa_available() == -1) return 0; - num_node = numa_num_task_nodes(); + num_node = numa_max_node(); /* Ignore binding requests to invalid nodes IDs */ if (node_id >= (unsigned)num_node) { diff --git a/src/queues.c b/src/queues.c index e0cca33c58..8505bdbb1a 100644 --- a/src/queues.c +++ b/src/queues.c @@ -424,6 +424,7 @@ void *allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, flags.ui32.NonPaged = nonPaged; flags.ui32.PageSize = HSA_PAGE_SIZE_4KB; flags.ui32.CoarseGrain = DeviceLocal; + flags.ui32.NoNUMABind = 1; size = ALIGN_UP(size, align);