From 87aca673e8a9da2bd93a49dc34db2a08e613b008 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 13 Jul 2022 16:51:56 -0400 Subject: [PATCH] libhsakmt: Init apertures in AcquireSystemProperties This allows init_process_apertures to use the whole consistent topoology instead of taking its own partial snapshot. Signed-off-by: Felix Kuehling Change-Id: Ia13e7aa7fcd090ea8d6cacd4babb29a27c20207f --- src/fmm.c | 2 +- src/libhsakmt.h | 4 +-- src/openclose.c | 14 ---------- src/topology.c | 73 ++++++++++++++++++++++++++++++++----------------- 4 files changed, 51 insertions(+), 42 deletions(-) diff --git a/src/fmm.c b/src/fmm.c index 92b76e1a19..97d03cfdbd 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -2234,7 +2234,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) for (i = 0; i < NumNodes; i++) { memset(&props, 0, sizeof(props)); - ret = topology_sysfs_get_node_props(i, &props, NULL, NULL); + ret = topology_get_node_props(i, &props); if (ret != HSAKMT_STATUS_SUCCESS) goto sysfs_parse_failed; diff --git a/src/libhsakmt.h b/src/libhsakmt.h index 822744b6b7..b3cbea7b4f 100644 --- a/src/libhsakmt.h +++ b/src/libhsakmt.h @@ -172,9 +172,9 @@ int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id); HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, uint32_t NumberOfNodes, uint32_t *NodeArray); -HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, - bool *p2p_links, uint32_t *num_p2pLinks); HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props); +HSAKMT_STATUS topology_get_node_props(HSAuint32 NodeId, + HsaNodeProperties *NodeProperties); void topology_setup_is_dgpu_param(HsaNodeProperties *props); bool topology_is_svm_needed(HSA_ENGINE_ID EngineId); diff --git a/src/openclose.c b/src/openclose.c index 19dbf65894..d52868e963 100644 --- a/src/openclose.c +++ b/src/openclose.c @@ -179,14 +179,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) if (result != HSAKMT_STATUS_SUCCESS) goto topology_sysfs_failed; - result = fmm_init_process_apertures(sys_props.NumNodes); - if (result != HSAKMT_STATUS_SUCCESS) - goto init_process_aperture_failed; - - result = init_process_doorbells(sys_props.NumNodes); - if (result != HSAKMT_STATUS_SUCCESS) - goto init_doorbell_failed; - kfd_open_count = 1; if (init_device_debugging_memory(sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS) @@ -212,10 +204,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) pthread_mutex_unlock(&hsakmt_mutex); return result; - -init_doorbell_failed: - fmm_destroy_process_apertures(); -init_process_aperture_failed: topology_sysfs_failed: kfd_version_failed: close(fd); @@ -235,8 +223,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void) if (--kfd_open_count == 0) { destroy_counter_props(); destroy_device_debugging_memory(); - destroy_process_doorbells(); - fmm_destroy_process_apertures(); if (kfd_fd) { close(kfd_fd); kfd_fd = 0; diff --git a/src/topology.c b/src/topology.c index 99a6a03c89..ed8f0d9dd7 100644 --- a/src/topology.c +++ b/src/topology.c @@ -87,7 +87,7 @@ static const char *supported_processor_vendor_name[] = { }; static HSAKMT_STATUS topology_take_snapshot(void); -static HSAKMT_STATUS topology_drop_snapshot(void); +static void topology_drop_snapshot(void); static const struct hsa_gfxip_table gfxip_lookup_table[] = { /* Kaveri Family */ @@ -1034,10 +1034,10 @@ static int topology_get_marketing_name(int minor, uint16_t *marketing_name) return 0; } -HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, - HsaNodeProperties *props, - bool *p2p_links, - uint32_t *num_p2pLinks) +static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, + HsaNodeProperties *props, + bool *p2p_links, + uint32_t *num_p2pLinks) { FILE *fd; char *read_buf, *p, *envvar, dummy; @@ -2066,15 +2066,10 @@ err: } /* Drop the Snashot of the HSA topology information. Assume lock is held. */ -HSAKMT_STATUS topology_drop_snapshot(void) +void topology_drop_snapshot(void) { - HSAKMT_STATUS err; - - if (!!g_system != !!g_props) { + if (!!g_system != !!g_props) pr_warn("Probably inconsistency?\n"); - err = HSAKMT_STATUS_SUCCESS; - goto out; - } if (g_props) { /* Remove state */ @@ -2090,11 +2085,6 @@ HSAKMT_STATUS topology_drop_snapshot(void) map_user_to_sysfs_node_id = NULL; map_user_to_sysfs_node_id_size = 0; } - - err = HSAKMT_STATUS_SUCCESS; - -out: - return err; } HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id) @@ -2124,7 +2114,7 @@ HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id) HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties) { - HSAKMT_STATUS err; + HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS; CHECK_KFD_OPEN(); @@ -2133,14 +2123,36 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste pthread_mutex_lock(&hsakmt_mutex); + /* We already have a valid snapshot. Avoid double initialization that + * would leak memory. + */ + if (g_system) { + *SystemProperties = *g_system; + goto out; + } + err = topology_take_snapshot(); if (err != HSAKMT_STATUS_SUCCESS) goto out; assert(g_system); + err = fmm_init_process_apertures(g_system->NumNodes); + if (err != HSAKMT_STATUS_SUCCESS) + goto init_process_apertures_failed; + + err = init_process_doorbells(g_system->NumNodes); + if (err != HSAKMT_STATUS_SUCCESS) + goto init_doorbells_failed; + *SystemProperties = *g_system; - err = HSAKMT_STATUS_SUCCESS; + + goto out; + +init_doorbells_failed: + fmm_destroy_process_apertures(); +init_process_apertures_failed: + topology_drop_snapshot(); out: pthread_mutex_unlock(&hsakmt_mutex); @@ -2149,15 +2161,25 @@ out: HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void) { - HSAKMT_STATUS err; - pthread_mutex_lock(&hsakmt_mutex); - err = topology_drop_snapshot(); + destroy_process_doorbells(); + fmm_destroy_process_apertures(); + topology_drop_snapshot(); pthread_mutex_unlock(&hsakmt_mutex); - return err; + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS topology_get_node_props(HSAuint32 NodeId, + HsaNodeProperties *NodeProperties) +{ + if (!g_system || !g_props || NodeId >= g_system->NumNodes) + return HSAKMT_STATUS_ERROR; + + *NodeProperties = g_props[NodeId].node; + return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId, @@ -2176,7 +2198,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId, if (err != HSAKMT_STATUS_SUCCESS) goto out; - *NodeProperties = g_props[NodeId].node; + err = topology_get_node_props(NodeId, NodeProperties); + if (err != HSAKMT_STATUS_SUCCESS) + goto out; /* For CPU only node don't add any additional GPU memory banks. */ if (gpu_id) { uint64_t base, limit; @@ -2188,7 +2212,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId, &limit) == HSAKMT_STATUS_SUCCESS) NodeProperties->NumMemoryBanks += 1; } - err = HSAKMT_STATUS_SUCCESS; out: pthread_mutex_unlock(&hsakmt_mutex);