From ac547f8cb2bd9bb90754d178a75e313450a0d700 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Thu, 3 Mar 2016 18:31:17 -0500 Subject: [PATCH] Add reverse direct io_links The Kernel only creates one way direct link - GPU(PCI_BUS) --> [Parent NUMA Node] Create the reverse direct io_link here - [Parent NUMA Node] --> GPU(PCI_BUS) Change-Id: I829a1b1b7f34bda42871ede3472d60915e88418c [ROCm/ROCR-Runtime commit: 1d1c30db7cde97fba7f3b72b10708f3c1c5d85d9] --- projects/rocr-runtime/src/topology.c | 64 ++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/projects/rocr-runtime/src/topology.c b/projects/rocr-runtime/src/topology.c index f22586e198..b1510ae0ab 100644 --- a/projects/rocr-runtime/src/topology.c +++ b/projects/rocr-runtime/src/topology.c @@ -845,6 +845,29 @@ err1: return ret; } +/* topology_create_reverse_io_link - Create io_links from the given CPU + * NUMA node to all the GPUs attached to that node + */ +static void topology_create_reverse_io_link(uint32_t cpu_node, + const HsaSystemProperties *sys_props, node_t *temp_nodes) +{ + unsigned int gpu_node; + HsaIoLinkProperties *props = temp_nodes[cpu_node].link; + + for (gpu_node = 0; gpu_node < sys_props->NumNodes; gpu_node++) { + if (temp_nodes[gpu_node].gpu_id != 0) { + /* Check if this GPU is connected to the give cpu_node, + * if so create an io_link */ + if (temp_nodes[gpu_node].link->NodeTo == cpu_node) { + props->NodeFrom = cpu_node; + props->NodeTo = gpu_node; + props->Weight = temp_nodes[gpu_node].link->Weight; + props++; + } + } + } +} + HSAKMT_STATUS topology_take_snapshot(void) { @@ -931,6 +954,47 @@ retry: } } + /* The Kernel only creates one way direct link - + * GPU(PCI_BUS) --> Parent NUMA Node. Create the reverse direct + * io_link here. [NUMA node] --> GPU */ + + for (i = 0; i < sys_props.NumNodes; i++) { + /* For each CPU Node, compute the number of direct io_links it has. + * For that, parse all the GPU Nodes, find the CPU Parent node to + * which it has a direct link to. And increment NumIOLinks for that + * CPU node */ + if (temp_nodes[i].gpu_id != 0) { + if (temp_nodes[i].link) { + if (temp_nodes[i].link->NodeTo < sys_props.NumNodes) + temp_nodes[temp_nodes[i].link->NodeTo].node.NumIOLinks++; + else + printf("Node [%d] has io_link to invalid Node [%d]\n", + i, temp_nodes[i].link->NodeTo); + } + else + printf("GPU [0x%x] is missing its direct IO LINK\n", + temp_nodes[i].gpu_id); + } + } + + /* Create the reverse io_link for all the CPU nodes */ + for (i = 0; i < sys_props.NumNodes; i++) { + if (temp_nodes[i].gpu_id == 0) { + if (temp_nodes[i].link) { + printf("Node [%d] has unexpected io_link. Skipping.\n", i); + continue; + } + temp_nodes[i].link = calloc(temp_nodes[i].node.NumIOLinks, + sizeof(HsaIoLinkProperties)); + if (!temp_nodes[i].link) { + ret = HSAKMT_STATUS_NO_MEMORY; + free_nodes(temp_nodes, i + 1); + goto err; + } + topology_create_reverse_io_link(i, &sys_props, temp_nodes); + } + } + ret = topology_sysfs_get_generation(&gen_end); if (ret != HSAKMT_STATUS_SUCCESS) { free_nodes(temp_nodes, sys_props.NumNodes);