diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h index 5d6df38f95..5ac51c2208 100644 --- a/include/hsakmttypes.h +++ b/include/hsakmttypes.h @@ -533,7 +533,8 @@ typedef struct _HsaMemFlags // The KFD will ensure that the memory returned is allocated in the optimal memory location // and optimal alignment requirements unsigned int FixedAddress : 1; // Allocate memory at specified virtual address. Fail if address is not free. - unsigned int Reserved : 16; + unsigned int NoNUMABind: 1; // Don't bind system memory to a specific NUMA node + unsigned int Reserved : 15; } ui32; HSAuint32 Value; diff --git a/src/fmm.c b/src/fmm.c index e7c83d3fd3..a3f6e7b6a2 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -1397,7 +1397,7 @@ void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, flags.Value = 0; flags.ui32.NonPaged = 1; flags.ui32.HostAccess = 1; - flags.ui32.Reserved = 0xBe11; + flags.ui32.Reserved = 0xBe1; pthread_mutex_lock(&aperture->fmm_mutex); vm_obj->flags = flags.Value; @@ -1462,10 +1462,13 @@ static int bind_mem_to_numa(uint32_t node_id, void *mem, int num_node; long r; + if (flags.ui32.NoNUMABind) + return 0; + if (numa_available() == -1) return 0; - num_node = numa_num_task_nodes(); + num_node = numa_max_node(); /* Ignore binding requests to invalid nodes IDs */ if (node_id >= (unsigned)num_node) { diff --git a/src/queues.c b/src/queues.c index e0cca33c58..8505bdbb1a 100644 --- a/src/queues.c +++ b/src/queues.c @@ -424,6 +424,7 @@ void *allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, flags.ui32.NonPaged = nonPaged; flags.ui32.PageSize = HSA_PAGE_SIZE_4KB; flags.ui32.CoarseGrain = DeviceLocal; + flags.ui32.NoNUMABind = 1; size = ALIGN_UP(size, align);