diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 1a0ff6d473..7fe1b40c5e 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -19,6 +19,163 @@ Added `amdsmi_get_gpu_mem_overdrive_level()` function to amd-smi C and Python Li - **Added Subsystem Device ID to `amd-smi static --asic`**. No underlying changes to amdsmi_get_gpu_asic_info +- **Added retrieving connection type and P2P capabilities between two GPUs**. + - Added `amdsmi_topo_get_p2p_status` function to amd-smi C and Python Libraries. + - Added retrieving P2P link capabilities to CLI `amd-smi topology`. + +```shell +$ amd-smi topology -h +usage: amd-smi topology [-h] [--json | --csv] [--file FILE] [--loglevel LEVEL] + [-g GPU [GPU ...]] [-a] [-w] [-o] [-t] [-b] + +If no GPU is specified, returns information for all GPUs on the system. +If no topology argument is provided all topology information will be displayed. + +Topology arguments: + -h, --help show this help message and exit + -g, --gpu GPU [GPU ...] Select a GPU ID, BDF, or UUID from the possible choices: + ID: 0 | BDF: 0000:0c:00.0 | UUID: 5fff74a1-0000-1000-808c-324a4d24b37e + ID: 1 | BDF: 0000:22:00.0 | UUID: 06ff74a1-0000-1000-80d3-f5e97636ae62 + ID: 2 | BDF: 0000:38:00.0 | UUID: 87ff74a1-0000-1000-80a0-d0a45576c5ed + ID: 3 | BDF: 0000:5c:00.0 | UUID: 5dff74a1-0000-1000-8054-a29c595fd7f3 + ID: 4 | BDF: 0000:9f:00.0 | UUID: a8ff74a1-0000-1000-805b-92615ca9e7b4 + ID: 5 | BDF: 0000:af:00.0 | UUID: ddff74a1-0000-1000-809e-5a98a60013bd + ID: 6 | BDF: 0000:bf:00.0 | UUID: 9aff74a1-0000-1000-80e8-cbefaf9f72c3 + ID: 7 | BDF: 0000:df:00.0 | UUID: 48ff74a1-0000-1000-806e-3c0b30d78e00 + all | Selects all devices + + + -a, --access Displays link accessibility between GPUs + -w, --weight Displays relative weight between GPUs + -o, --hops Displays the number of hops between GPUs + -t, --link-type Displays the link type between GPUs + -b, --numa-bw Display max and min bandwidth between nodes + -c, --coherent Display cache coherant (or non-coherant) link capability between nodes + -n, --atomics Display 32 and 64-bit atomic io link capability between nodes + -d, --dma Display P2P direct memory access (DMA) link capability between nodes + -z, --bi-dir Display P2P bi-directional link capability between nodes + + +Command Modifiers: + --json Displays output in JSON format (human readable by default). + --csv Displays output in CSV format (human readable by default). + --file FILE Saves output into a file on the provided path (stdout by default). + --loglevel LEVEL Set the logging level from the possible choices: + DEBUG, INFO, WARNING, ERROR, CRITICAL +``` + +```shell +$ amd-smi topology +ACCESS TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:22:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:38:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:5c:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:9f:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:af:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:bf:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED +0000:df:00.0 ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED ENABLED + +WEIGHT TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 0 15 15 15 15 15 15 15 +0000:22:00.0 15 0 15 15 15 15 15 15 +0000:38:00.0 15 15 0 15 15 15 15 15 +0000:5c:00.0 15 15 15 0 15 15 15 15 +0000:9f:00.0 15 15 15 15 0 15 15 15 +0000:af:00.0 15 15 15 15 15 0 15 15 +0000:bf:00.0 15 15 15 15 15 15 0 15 +0000:df:00.0 15 15 15 15 15 15 15 0 + +HOPS TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 0 1 1 1 1 1 1 1 +0000:22:00.0 1 0 1 1 1 1 1 1 +0000:38:00.0 1 1 0 1 1 1 1 1 +0000:5c:00.0 1 1 1 0 1 1 1 1 +0000:9f:00.0 1 1 1 1 0 1 1 1 +0000:af:00.0 1 1 1 1 1 0 1 1 +0000:bf:00.0 1 1 1 1 1 1 0 1 +0000:df:00.0 1 1 1 1 1 1 1 0 + +LINK TYPE TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 SELF XGMI XGMI XGMI XGMI XGMI XGMI XGMI +0000:22:00.0 XGMI SELF XGMI XGMI XGMI XGMI XGMI XGMI +0000:38:00.0 XGMI XGMI SELF XGMI XGMI XGMI XGMI XGMI +0000:5c:00.0 XGMI XGMI XGMI SELF XGMI XGMI XGMI XGMI +0000:9f:00.0 XGMI XGMI XGMI XGMI SELF XGMI XGMI XGMI +0000:af:00.0 XGMI XGMI XGMI XGMI XGMI SELF XGMI XGMI +0000:bf:00.0 XGMI XGMI XGMI XGMI XGMI XGMI SELF XGMI +0000:df:00.0 XGMI XGMI XGMI XGMI XGMI XGMI XGMI SELF + +NUMA BW TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 N/A 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 +0000:22:00.0 50000-50000 N/A 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 +0000:38:00.0 50000-50000 50000-50000 N/A 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 +0000:5c:00.0 50000-50000 50000-50000 50000-50000 N/A 50000-50000 50000-50000 50000-50000 50000-50000 +0000:9f:00.0 50000-50000 50000-50000 50000-50000 50000-50000 N/A 50000-50000 50000-50000 50000-50000 +0000:af:00.0 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 N/A 50000-50000 50000-50000 +0000:bf:00.0 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 N/A 50000-50000 +0000:df:00.0 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 50000-50000 N/A + +CACHE COHERANCY TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 SELF C NC NC C C C NC +0000:22:00.0 C SELF NC C C C NC C +0000:38:00.0 NC NC SELF C C NC C NC +0000:5c:00.0 NC C C SELF NC C NC NC +0000:9f:00.0 C C C NC SELF NC NC C +0000:af:00.0 C C NC C NC SELF C C +0000:bf:00.0 C NC C NC NC C SELF NC +0000:df:00.0 NC C NC NC C C NC SELF + +ATOMICS TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 SELF 64,32 64,32 64 32 32 N/A 64,32 +0000:22:00.0 64,32 SELF 64 32 32 N/A 64,32 64,32 +0000:38:00.0 64,32 64 SELF 32 N/A 64,32 64,32 64,32 +0000:5c:00.0 64 32 32 SELF 64,32 64,32 64,32 32 +0000:9f:00.0 32 32 N/A 64,32 SELF 64,32 32 32 +0000:af:00.0 32 N/A 64,32 64,32 64,32 SELF 32 N/A +0000:bf:00.0 N/A 64,32 64,32 64,32 32 32 SELF 64,32 +0000:df:00.0 64,32 64,32 64,32 32 32 N/A 64,32 SELF + +DMA TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 SELF T T F F T F T +0000:22:00.0 T SELF F F T F T T +0000:38:00.0 T F SELF T F T T T +0000:5c:00.0 F F T SELF T T T F +0000:9f:00.0 F T F T SELF T F F +0000:af:00.0 T F T T T SELF F T +0000:bf:00.0 F T T T F F SELF F +0000:df:00.0 T T T F F T F SELF + +BI-DIRECTIONAL TABLE: + 0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0 +0000:0c:00.0 SELF T T F F T F T +0000:22:00.0 T SELF F F T F T T +0000:38:00.0 T F SELF T F T T T +0000:5c:00.0 F F T SELF T T T F +0000:9f:00.0 F T F T SELF T F F +0000:af:00.0 T F T T T SELF F T +0000:bf:00.0 F T T T F F SELF F +0000:df:00.0 T T T F F T F SELF + + +Legend: + SELF = Current GPU + ENABLED / DISABLED = Link is enabled or disabled + N/A = Not supported + T/F = True / False + C/NC = Coherant / Non-Coherant io links + 64,32 = 64 bit and 32 bit atomic support + - +``` + ### Removals - N/A diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index b955f8b4ab..c3b1a158da 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -2822,7 +2822,8 @@ class AMDSMICommands(): def topology(self, args, multiple_devices=False, gpu=None, access=None, - weight=None, hops=None, link_type=None, numa_bw=None): + weight=None, hops=None, link_type=None, numa_bw=None, + coherent=None, atomics=None, dma=None, bi_dir=None): """ Get topology information for target gpus params: args - argparser args to pass to subcommand @@ -2833,6 +2834,10 @@ class AMDSMICommands(): hops (bool) - Value override for args.hops type (bool) - Value override for args.type numa_bw (bool) - Value override for args.numa_bw + coherent (bool) - Value override for args.coherent + atomics (bool) - Value override for args.atomics + dma (bool) - Value override for args.dma + bi_dir (bool) - Value override for args.bi_dir return: Nothing """ @@ -2849,6 +2854,14 @@ class AMDSMICommands(): args.link_type = link_type if numa_bw: args.numa_bw = numa_bw + if coherent: + args.coherent = coherent + if atomics: + args.atomics = atomics + if dma: + args.dma = dma + if bi_dir: + args.bi_dir = bi_dir # Handle No GPU passed if args.gpu == None: @@ -2858,8 +2871,10 @@ class AMDSMICommands(): args.gpu = [args.gpu] # Handle all args being false - if not any([args.access, args.weight, args.hops, args.link_type, args.numa_bw]): - args.access = args.weight = args.hops = args.link_type= args.numa_bw = True + if not any([args.access, args.weight, args.hops, args.link_type, args.numa_bw, + args.coherent, args.atomics, args.dma, args.bi_dir]): + args.access = args.weight = args.hops = args.link_type= args.numa_bw = \ + args.coherent = args.atomics = args.dma = args.bi_dir = True # Clear the table header self.logger.table_header = ''.rjust(12) @@ -2890,6 +2905,10 @@ class AMDSMICommands(): # "num_hops": num_hops - # of hops between devices # "bandwidth": numa_bw - The NUMA "minimum bandwidth-maximum bandwidth" beween src and dest nodes # "N/A" - self node or not connected devices + # "coherent": coherent - Coherant / Non-Coherant io links + # "atomics": atomics - 32 and 64-bit atomic io link capability between nodes + # "dma": dma - P2P direct memory access (DMA) link capability between nodes + # "bi_dir": bi_dir - P2P bi-directional link capability between nodes # } for dest_gpu_index, dest_gpu in enumerate(args.gpu): @@ -2928,6 +2947,42 @@ class AMDSMICommands(): else: link_status = "DISABLED" + link_coherent = "SELF" + link_atomics = "SELF" + link_dma = "SELF" + link_bi_dir = "SELF" + + if src_gpu != dest_gpu: + try: + cap = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap'] + link_coherent = ( + "C" if cap['is_iolink_coherent'] == 1 else + "NC" if cap['is_iolink_coherent'] == 0 else + "N/A" + ) + link_atomics = ( + "64,32" if cap['is_iolink_atomics_32bit'] == 1 and cap['is_iolink_atomics_64bit'] == 1 else + "32" if cap['is_iolink_atomics_32bit'] == 1 else + "64" if cap['is_iolink_atomics_64bit'] == 1 else + "N/A" + ) + link_dma = ( + "T" if cap['is_iolink_dma'] == 1 else + "F" if cap['is_iolink_dma'] == 0 else + "N/A" + ) + link_bi_dir = ( + "T" if cap['is_iolink_bi_directional'] == 1 else + "F" if cap['is_iolink_bi_directional'] == 0 else + "N/A" + ) + except amdsmi_exception.AmdSmiLibraryException as e: + logging.debug("Failed to get link status for %s to %s | %s", + self.helpers.get_gpu_id_from_device_handle(src_gpu), + self.helpers.get_gpu_id_from_device_handle(dest_gpu), + e.get_error_info()) + + # link_status = amdsmi_is_P2P_accessible(src,dest) dest_gpu_links = { "gpu": self.helpers.get_gpu_id_from_device_handle(dest_gpu), @@ -2937,6 +2992,10 @@ class AMDSMICommands(): "link_type": link_type, "num_hops": num_hops, "bandwidth": numa_bw, + "coherent": link_coherent, + "atomics": link_atomics, + "dma": link_dma, + "bi_dir": link_bi_dir } if not args.access: del dest_gpu_links['link_status'] @@ -2948,6 +3007,14 @@ class AMDSMICommands(): del dest_gpu_links['num_hops'] if not args.numa_bw: del dest_gpu_links['bandwidth'] + if not args.coherent: + del dest_gpu_links['coherent'] + if not args.atomics: + del dest_gpu_links['atomics'] + if not args.dma: + del dest_gpu_links['dma'] + if not args.bi_dir: + del dest_gpu_links['bi_dir'] links.append(dest_gpu_links) dest_end = dest_gpu_index+1 == len(args.gpu) isEndOfSrc = src_gpu_index+1 == len(args.gpu) @@ -3165,6 +3232,175 @@ class AMDSMICommands(): self.logger.table_title = "NUMA BW TABLE" self.logger.print_output(multiple_device_enabled=True, tabular=True) + if args.coherent: + tabular_output = [] + for src_gpu_index, src_gpu in enumerate(args.gpu): + src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu) + if self.logger.is_human_readable_format(): + tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "} + else: + tabular_output_dict = {'gpu' : src_gpu_bdf} + src_gpu_coherent = {} + for dest_gpu in args.gpu: + dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu) + dest_gpu_key = f'gpu_{dest_gpu_id}' + + if src_gpu == dest_gpu: + src_gpu_coherent[dest_gpu_key] = "SELF" + continue + try: + iolink_coherent = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']['is_iolink_coherent'] + src_gpu_coherent[dest_gpu_key] = "C" if iolink_coherent == 1 else "NC" if iolink_coherent == 0 else "N/A" + except amdsmi_exception.AmdSmiLibraryException as e: + src_gpu_coherent[dest_gpu_key] = "N/A" + logging.debug("Failed to get link coherent for %s to %s | %s", + self.helpers.get_gpu_id_from_device_handle(src_gpu), + self.helpers.get_gpu_id_from_device_handle(dest_gpu), + e.get_error_info()) + + topo_values[src_gpu_index]['coherent'] = src_gpu_coherent + + tabular_output_dict.update(src_gpu_coherent) + tabular_output.append(tabular_output_dict) + + if self.logger.is_human_readable_format(): + self.logger.multiple_device_output = tabular_output + self.logger.table_title = "CACHE COHERANCY TABLE" + self.logger.print_output(multiple_device_enabled=True, tabular=True) + + if args.atomics: + tabular_output = [] + for src_gpu_index, src_gpu in enumerate(args.gpu): + src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu) + if self.logger.is_human_readable_format(): + tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "} + else: + tabular_output_dict = {'gpu' : src_gpu_bdf} + src_gpu_atomics = {} + for dest_gpu in args.gpu: + dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu) + dest_gpu_key = f'gpu_{dest_gpu_id}' + + if src_gpu == dest_gpu: + src_gpu_atomics[dest_gpu_key] = "SELF" + continue + try: + cap = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap'] + src_gpu_atomics[dest_gpu_key] = ( + "64,32" if cap['is_iolink_atomics_32bit'] == 1 and cap['is_iolink_atomics_64bit'] == 1 else + "32" if cap['is_iolink_atomics_32bit'] == 1 else + "64" if cap['is_iolink_atomics_64bit'] == 1 else + "N/A" + ) + except amdsmi_exception.AmdSmiLibraryException as e: + src_gpu_atomics[dest_gpu_key] = "N/A" + logging.debug("Failed to get link atomics for %s to %s | %s", + self.helpers.get_gpu_id_from_device_handle(src_gpu), + self.helpers.get_gpu_id_from_device_handle(dest_gpu), + e.get_error_info()) + + topo_values[src_gpu_index]['atomics'] = src_gpu_atomics + + tabular_output_dict.update(src_gpu_atomics) + tabular_output.append(tabular_output_dict) + + if self.logger.is_human_readable_format(): + self.logger.multiple_device_output = tabular_output + self.logger.table_title = "ATOMICS TABLE" + self.logger.print_output(multiple_device_enabled=True, tabular=True) + + if args.dma: + tabular_output = [] + for src_gpu_index, src_gpu in enumerate(args.gpu): + src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu) + if self.logger.is_human_readable_format(): + tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "} + else: + tabular_output_dict = {'gpu' : src_gpu_bdf} + src_gpu_dma = {} + for dest_gpu in args.gpu: + dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu) + dest_gpu_key = f'gpu_{dest_gpu_id}' + + if src_gpu == dest_gpu: + src_gpu_dma[dest_gpu_key] = "SELF" + continue + try: + iolink_dma = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']['is_iolink_dma'] + src_gpu_dma[dest_gpu_key] = "T" if iolink_dma == 1 else "F" if iolink_dma == 0 else "N/A" + except amdsmi_exception.AmdSmiLibraryException as e: + src_gpu_dma[dest_gpu_key] = "N/A" + logging.debug("Failed to get link dma for %s to %s | %s", + self.helpers.get_gpu_id_from_device_handle(src_gpu), + self.helpers.get_gpu_id_from_device_handle(dest_gpu), + e.get_error_info()) + + topo_values[src_gpu_index]['dma'] = src_gpu_dma + + tabular_output_dict.update(src_gpu_dma) + tabular_output.append(tabular_output_dict) + + if self.logger.is_human_readable_format(): + self.logger.multiple_device_output = tabular_output + self.logger.table_title = "DMA TABLE" + self.logger.print_output(multiple_device_enabled=True, tabular=True) + + if args.bi_dir: + tabular_output = [] + for src_gpu_index, src_gpu in enumerate(args.gpu): + src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu) + if self.logger.is_human_readable_format(): + tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "} + else: + tabular_output_dict = {'gpu' : src_gpu_bdf} + src_gpu_bi_dir = {} + for dest_gpu in args.gpu: + dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu) + dest_gpu_key = f'gpu_{dest_gpu_id}' + + if src_gpu == dest_gpu: + src_gpu_bi_dir[dest_gpu_key] = "SELF" + continue + try: + iolink_bi_dir = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']['is_iolink_bi_directional'] + src_gpu_bi_dir[dest_gpu_key] = "T" if iolink_bi_dir == 1 else "F" if iolink_bi_dir == 0 else "N/A" + except amdsmi_exception.AmdSmiLibraryException as e: + src_gpu_bi_dir[dest_gpu_key] = "N/A" + logging.debug("Failed to get link bi-directional for %s to %s | %s", + self.helpers.get_gpu_id_from_device_handle(src_gpu), + self.helpers.get_gpu_id_from_device_handle(dest_gpu), + e.get_error_info()) + + topo_values[src_gpu_index]['bi_dir'] = src_gpu_bi_dir + + tabular_output_dict.update(src_gpu_bi_dir) + tabular_output.append(tabular_output_dict) + + if self.logger.is_human_readable_format(): + self.logger.multiple_device_output = tabular_output + self.logger.table_title = "BI-DIRECTIONAL TABLE" + self.logger.print_output(multiple_device_enabled=True, tabular=True) + + if self.logger.is_human_readable_format(): + # Populate the legend output + legend_parts = [ + "\n\nLegend:", + " SELF = Current GPU", + " ENABLED / DISABLED = Link is enabled or disabled", + " N/A = Not supported", + " T/F = True / False", + " C/NC = Coherant / Non-Coherant io links", + " 64,32 = 64 bit and 32 bit atomic support", + " -" + ] + legend_output = "\n".join(legend_parts) + + if self.logger.destination == 'stdout': + print(legend_output) + else: + with self.logger.destination.open('a', encoding="utf-8") as output_file: + output_file.write(legend_output + '\n') + self.logger.multiple_device_output = topo_values if self.logger.is_csv_format(): diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 0a85ee7b01..ae1432526f 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -972,6 +972,10 @@ class AMDSMIParser(argparse.ArgumentParser): hops_help = "Displays the number of hops between GPUs" link_type_help = "Displays the link type between GPUs" numa_bw_help = "Display max and min bandwidth between nodes" + coherent_help = "Display cache coherant (or non-coherant) link capability between nodes" + atomics_help = "Display 32 and 64-bit atomic io link capability between nodes" + dma_help = "Display P2P direct memory access (DMA) link capability between nodes" + bi_dir_help = "Display P2P bi-directional link capability between nodes" # Create topology subparser topology_parser = subparsers.add_parser('topology', help=topology_help, description=topology_subcommand_help) @@ -989,6 +993,10 @@ class AMDSMIParser(argparse.ArgumentParser): topology_parser.add_argument('-o', '--hops', action='store_true', required=False, help=hops_help) topology_parser.add_argument('-t', '--link-type', action='store_true', required=False, help=link_type_help) topology_parser.add_argument('-b', '--numa-bw', action='store_true', required=False, help=numa_bw_help) + topology_parser.add_argument('-c', '--coherent', action='store_true', required=False, help=coherent_help) + topology_parser.add_argument('-n', '--atomics', action='store_true', required=False, help=atomics_help) + topology_parser.add_argument('-d', '--dma', action='store_true', required=False, help=dma_help) + topology_parser.add_argument('-z', '--bi-dir', action='store_true', required=False, help=bi_dir_help) def _add_set_value_parser(self, subparsers, func): diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index 9e5b988614..e9c415339d 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -690,6 +690,17 @@ typedef struct { uint32_t reserved[4]; } amdsmi_proc_info_t; +/** + * @brief IO Link P2P Capability + */ +typedef struct { + uint8_t is_iolink_coherent; // 1 = true, 0 = false, UINT8_MAX = Not defined. + uint8_t is_iolink_atomics_32bit; + uint8_t is_iolink_atomics_64bit; + uint8_t is_iolink_dma; + uint8_t is_iolink_bi_directional; +} amdsmi_p2p_capability_t; + //! Guaranteed maximum possible number of supported frequencies #define AMDSMI_MAX_NUM_FREQUENCIES 33 @@ -4283,6 +4294,36 @@ amdsmi_is_P2P_accessible(amdsmi_processor_handle processor_handle_src, amdsmi_processor_handle processor_handle_dst, bool *accessible); + +/** + * @brief Retrieve connection type and P2P capabilities between 2 GPUs + * + * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf} + * + * @details Given a source processor handle @p processor_handle_src and + * a destination processor handle @p processor_handle_dst, a pointer to an amdsmi_io_link_type_t @p type, + * and a pointer to amdsmi_p2p_capability_t @p cap. This function will write the connection type, + * and io link capabilities between the device + * @p processor_handle_src and @p processor_handle_dst to the memory + * pointed to by @p cap and @p type. + * + * @param[in] processor_handle_src the source processor handle + * + * @param[in] processor_handle_dst the destination processor handle + * + * @param[in,out] type A pointer to an ::amdsmi_io_link_type_t to which the + * type for the connection should be written. + * + * @param[in,out] type A pointer to an ::amdsmi_p2p_capability_t to which the + * io link capabilities should be written. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t +amdsmi_topo_get_p2p_status(amdsmi_processor_handle processor_handle_src, + amdsmi_processor_handle processor_handle_dst, + amdsmi_io_link_type_t *type, amdsmi_p2p_capability_t *cap); + /** @} End HWTopo */ /*****************************************************************************/ diff --git a/projects/amdsmi/py-interface/__init__.py b/projects/amdsmi/py-interface/__init__.py index 5e208aadcc..9dc45527b6 100644 --- a/projects/amdsmi/py-interface/__init__.py +++ b/projects/amdsmi/py-interface/__init__.py @@ -211,6 +211,7 @@ from .amdsmi_interface import amdsmi_topo_get_numa_node_number from .amdsmi_interface import amdsmi_topo_get_link_weight from .amdsmi_interface import amdsmi_get_minmax_bandwidth_between_processors from .amdsmi_interface import amdsmi_topo_get_link_type +from .amdsmi_interface import amdsmi_topo_get_p2p_status from .amdsmi_interface import amdsmi_is_P2P_accessible from .amdsmi_interface import amdsmi_get_xgmi_info diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index a7214776b4..c91319662a 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -2540,6 +2540,40 @@ def amdsmi_topo_get_link_type( return {"hops": hops.value, "type": type.value} +def amdsmi_topo_get_p2p_status( + processor_handle_src: amdsmi_wrapper.amdsmi_processor_handle, + processor_handle_dst: amdsmi_wrapper.amdsmi_processor_handle, +): + if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle + ) + + if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle + ) + + type = ctypes.c_uint32() + cap = amdsmi_wrapper.struct_amdsmi_p2p_capability_t() + + _check_res( + amdsmi_wrapper.amdsmi_topo_get_p2p_status( + processor_handle_src, processor_handle_dst, ctypes.byref(type), ctypes.byref(cap) + ) + ) + + return { + 'type' : type, + 'cap': { + 'is_iolink_coherent': cap.is_iolink_coherent, + 'is_iolink_atomics_32bit': cap.is_iolink_atomics_32bit, + 'is_iolink_atomics_64bit': cap.is_iolink_atomics_64bit, + 'is_iolink_dma': cap.is_iolink_dma, + 'is_iolink_bi_directional': cap.is_iolink_bi_directional + } + } + def amdsmi_is_P2P_accessible( processor_handle_src: amdsmi_wrapper.amdsmi_processor_handle, diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py index 805d8d88d8..8d8b0aa739 100644 --- a/projects/amdsmi/py-interface/amdsmi_wrapper.py +++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py @@ -1065,6 +1065,19 @@ struct_amdsmi_proc_info_t._fields_ = [ ] amdsmi_proc_info_t = struct_amdsmi_proc_info_t +class struct_amdsmi_p2p_capability_t(Structure): + pass + +struct_amdsmi_p2p_capability_t._pack_ = 1 # source:False +struct_amdsmi_p2p_capability_t._fields_ = [ + ('is_iolink_coherent', ctypes.c_ubyte), + ('is_iolink_atomics_32bit', ctypes.c_ubyte), + ('is_iolink_atomics_64bit', ctypes.c_ubyte), + ('is_iolink_dma', ctypes.c_ubyte), + ('is_iolink_bi_directional', ctypes.c_ubyte), +] + +amdsmi_p2p_capability_t = struct_amdsmi_p2p_capability_t # values for enumeration 'amdsmi_dev_perf_level_t' amdsmi_dev_perf_level_t__enumvalues = { @@ -2201,6 +2214,9 @@ amdsmi_topo_get_link_type.argtypes = [amdsmi_processor_handle, amdsmi_processor_ amdsmi_is_P2P_accessible = _libraries['libamd_smi.so'].amdsmi_is_P2P_accessible amdsmi_is_P2P_accessible.restype = amdsmi_status_t amdsmi_is_P2P_accessible.argtypes = [amdsmi_processor_handle, amdsmi_processor_handle, ctypes.POINTER(ctypes.c_bool)] +amdsmi_topo_get_p2p_status = _libraries['libamd_smi.so'].amdsmi_topo_get_p2p_status +amdsmi_topo_get_p2p_status.restype = amdsmi_status_t +amdsmi_topo_get_p2p_status.argtypes = [amdsmi_processor_handle, amdsmi_processor_handle, ctypes.POINTER(amdsmi_io_link_type_t), ctypes.POINTER(struct_amdsmi_p2p_capability_t)] amdsmi_get_gpu_compute_partition = _libraries['libamd_smi.so'].amdsmi_get_gpu_compute_partition amdsmi_get_gpu_compute_partition.restype = amdsmi_status_t amdsmi_get_gpu_compute_partition.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_char), uint32_t] @@ -2726,9 +2742,9 @@ __all__ = \ 'amdsmi_memory_partition_type_t', 'amdsmi_memory_type_t', 'amdsmi_mm_ip_t', 'amdsmi_name_value_t', 'amdsmi_od_vddc_point_t', 'amdsmi_od_volt_curve_t', 'amdsmi_od_volt_freq_data_t', - 'amdsmi_pcie_bandwidth_t', 'amdsmi_pcie_info_t', - 'amdsmi_power_cap_info_t', 'amdsmi_power_info_t', - 'amdsmi_power_profile_preset_masks_t', + 'amdsmi_p2p_capability_t', 'amdsmi_pcie_bandwidth_t', + 'amdsmi_pcie_info_t', 'amdsmi_power_cap_info_t', + 'amdsmi_power_info_t', 'amdsmi_power_profile_preset_masks_t', 'amdsmi_power_profile_status_t', 'amdsmi_power_type_t', 'amdsmi_proc_info_t', 'amdsmi_process_handle_t', 'amdsmi_process_info_t', 'amdsmi_processor_handle', @@ -2761,7 +2777,7 @@ __all__ = \ 'amdsmi_temp_range_refresh_rate_t', 'amdsmi_temperature_metric_t', 'amdsmi_temperature_type_t', 'amdsmi_topo_get_link_type', 'amdsmi_topo_get_link_weight', 'amdsmi_topo_get_numa_node_number', - 'amdsmi_utilization_counter_t', + 'amdsmi_topo_get_p2p_status', 'amdsmi_utilization_counter_t', 'amdsmi_utilization_counter_type_t', 'amdsmi_vbios_info_t', 'amdsmi_version_t', 'amdsmi_voltage_metric_t', 'amdsmi_voltage_type_t', 'amdsmi_vram_info_t', @@ -2785,6 +2801,7 @@ __all__ = \ 'struct_amdsmi_name_value_t', 'struct_amdsmi_od_vddc_point_t', 'struct_amdsmi_od_volt_curve_t', 'struct_amdsmi_od_volt_freq_data_t', + 'struct_amdsmi_p2p_capability_t', 'struct_amdsmi_pcie_bandwidth_t', 'struct_amdsmi_pcie_info_t', 'struct_amdsmi_power_cap_info_t', 'struct_amdsmi_power_info_t', 'struct_amdsmi_power_profile_status_t', diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h index a9dcaa18ce..47edd8cc09 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h @@ -866,6 +866,17 @@ typedef struct { typedef rsmi_frequencies_t rsmi_frequencies; /// \endcond +/** + * @brief IO Link P2P Capability + */ +typedef struct { + uint8_t is_iolink_coherent; // 1 = true, 0 = false, UINT8_MAX = Not defined. + uint8_t is_iolink_atomics_32bit; + uint8_t is_iolink_atomics_64bit; + uint8_t is_iolink_dma; + uint8_t is_iolink_bi_directional; +} rsmi_p2p_capability_t; + /** * @brief This structure holds information about the possible PCIe * bandwidths. Specifically, the possible transfer rates and their @@ -4326,6 +4337,37 @@ rsmi_status_t rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst, bool *accessible); +/** + * @brief Retrieve connection type and P2P capabilities between 2 GPUs + * + * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf} + * + * @details Given a source processor handle @p processor_handle_src and + * a destination processor handle @p processor_handle_dst, a pointer to an amdsmi_io_link_type_t @p type, + * and a pointer to rsmi_p2p_capability_t @p cap. This function will write the connection type, + * and io link capabilities between the device + * @p processor_handle_src and @p processor_handle_dst to the memory + * pointed to by @p cap and @p type. + * + * @param[in] dv_ind_src the source device index + * + * @param[in] dv_ind_dst the destination device index + * + * @param[inout] type A pointer to an ::RSMI_IO_LINK_TYPE to which the + * type for the connection should be written. + * + * @param[in,out] cap A pointer to an ::rsmi_p2p_capability_t to which the + * io link capabilities should be written. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function + */ +rsmi_status_t +rsmi_topo_get_p2p_status(uint32_t dv_ind_src, uint32_t dv_ind_dst, + RSMI_IO_LINK_TYPE *type, rsmi_p2p_capability_t *cap); + /** @} */ // end of HWTopo /*****************************************************************************/ diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_io_link.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_io_link.h index 191d5c96f2..e7bc35ebc2 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_io_link.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_io_link.h @@ -85,7 +85,8 @@ typedef enum _LINK_DIRECTORY_TYPE { class IOLink { public: explicit IOLink(uint32_t node_indx, uint32_t link_indx, LINK_DIRECTORY_TYPE link_dir_type) : - node_indx_(node_indx), link_indx_(link_indx), link_dir_type_(link_dir_type) {} + node_indx_(node_indx), link_indx_(link_indx), link_dir_type_(link_dir_type), + link_cap_{UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX} {} ~IOLink(); int Initialize(); @@ -96,23 +97,28 @@ class IOLink { IO_LINK_TYPE type(void) const {return type_;} uint32_t node_from(void) const {return node_from_;} uint32_t node_to(void) const {return node_to_;} + uint32_t flag(void) const {return flags_;} uint64_t weight(void) const {return weight_;} LINK_DIRECTORY_TYPE get_directory_type(void) const {return link_dir_type_;} uint64_t min_bandwidth(void) const {return min_bandwidth_;} uint64_t max_bandwidth(void) const {return max_bandwidth_;} + const rsmi_p2p_capability_t& get_link_capability(void) const {return link_cap_;} - + protected: + virtual int UpdateP2pCapability(void); private: uint32_t node_indx_; uint32_t link_indx_; IO_LINK_TYPE type_; uint32_t node_from_; uint32_t node_to_; + uint32_t flags_; uint64_t weight_; uint64_t min_bandwidth_; uint64_t max_bandwidth_; std::map properties_; LINK_DIRECTORY_TYPE link_dir_type_; + rsmi_p2p_capability_t link_cap_; }; int diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi.cc b/projects/amdsmi/rocm_smi/src/rocm_smi.cc index 3830f63986..7672e8b58c 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi.cc @@ -5285,6 +5285,81 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst, CATCH } +rsmi_status_t +rsmi_topo_get_p2p_status(uint32_t dv_ind_src, uint32_t dv_ind_dst, + RSMI_IO_LINK_TYPE *type, rsmi_p2p_capability_t *cap) { + TRY + + uint32_t dv_ind = dv_ind_src; + GET_DEV_AND_KFDNODE_FROM_INDX + DEVICE_MUTEX + + if (type == nullptr || cap == nullptr) { + return RSMI_STATUS_INVALID_ARGS; + } + + // If source device is same as destination, return invalid args + if (dv_ind_src == dv_ind_dst) { + return RSMI_STATUS_INVALID_ARGS; + } + + uint32_t node_ind_src, node_ind_dst; + // Fetch the source and destination node index + if (smi.get_node_index(dv_ind_src, &node_ind_src) || + smi.get_node_index(dv_ind_dst, &node_ind_dst)) { + return RSMI_STATUS_INVALID_ARGS; + } + + bool node_is_find = false; + std::map> io_link_map_tmp; + std::map>::iterator it; + // Iterate over P2P links + if (DiscoverP2PLinksPerNode(node_ind_src, &io_link_map_tmp) == 0) { + for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++) { + if (it->first == node_ind_dst) { + node_is_find = true; + break; + } + } + io_link_map_tmp.clear(); + } else { + return RSMI_STATUS_FILE_ERROR; + } + + if (!node_is_find) { + // Iterate over IO links + if (DiscoverIOLinksPerNode(node_ind_src, &io_link_map_tmp) == 0) { + for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++) { + if (it->first == node_ind_dst) { + node_is_find = true; + break; + } + } + io_link_map_tmp.clear(); + } else { + return RSMI_STATUS_FILE_ERROR; + } + } + + if (node_is_find) { + amd::smi::IO_LINK_TYPE io_link_type = it->second->type(); + if (io_link_type == amd::smi::IOLINK_TYPE_PCIEXPRESS) { + *type = RSMI_IOLINK_TYPE_PCIEXPRESS; + } else if (io_link_type == amd::smi::IOLINK_TYPE_XGMI) { + *type = RSMI_IOLINK_TYPE_XGMI; + } else { + // Unexpected IO Link type read + return RSMI_STATUS_NOT_SUPPORTED; + } + *cap = it->second->get_link_capability(); + return RSMI_STATUS_SUCCESS; + } + + return RSMI_STATUS_NOT_SUPPORTED; + + CATCH +} + static rsmi_status_t get_compute_partition(uint32_t dv_ind, std::string &compute_partition) { TRY diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_io_link.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_io_link.cc index da3795fafa..93b4ae238a 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_io_link.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_io_link.cc @@ -57,6 +57,15 @@ #include "rocm_smi/rocm_smi_utils.h" #include "rocm_smi/rocm_smi_io_link.h" + +#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31) +#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0 + namespace amd { namespace smi { @@ -76,7 +85,7 @@ static const char *kIOLinkPropMIN_BANDWIDTHStr = "min_bandwidth"; static const char *kIOLinkPropMAX_BANDWIDTHStr = "max_bandwidth"; // static const char *kIOLinkPropRECOMMENDED_TRANSFER_SIZEStr = // "recommended_transfer_size"; -// static const char *kIOLinkPropFLAGSStr = "flags"; +static const char *kIOLinkPropFLAGSStr = "flags"; static bool is_number(const std::string &s) { return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); @@ -380,6 +389,12 @@ IOLink::Initialize(void) { ret = get_property_value(kIOLinkPropWEIGHTStr, &weight_); if (ret) {return ret;} + ret = get_property_value(kIOLinkPropFLAGSStr, reinterpret_cast(&flags_)); + if (ret) {return ret;} + + ret = UpdateP2pCapability(); + if (ret) {return ret;} + ret = get_property_value(kIOLinkPropMIN_BANDWIDTHStr, &min_bandwidth_); if (ret) {return ret;} @@ -401,5 +416,31 @@ IOLink::get_property_value(std::string property, uint64_t *value) { return 0; } +int IOLink::UpdateP2pCapability(void) { + const uint8_t cap_true = 1; + const uint8_t cap_false = 0; + + if (!(flags_ & CRAT_IOLINK_FLAGS_ENABLED)) { + return 0; + } + + link_cap_.is_iolink_coherent = + (flags_ & CRAT_IOLINK_FLAGS_NON_COHERENT) ? cap_false : cap_true; + + link_cap_.is_iolink_atomics_32bit = + (flags_ & CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT) ? cap_false : cap_true; + + link_cap_.is_iolink_atomics_64bit = + (flags_ & CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT) ? cap_false : cap_true; + + link_cap_.is_iolink_bi_directional = + (flags_ & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL) ? cap_true : cap_false; + + link_cap_.is_iolink_dma = + (flags_ & CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA) ? cap_false : cap_true; + + return 0; +} + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index c2650c8dcf..5fe6398f26 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -1053,6 +1053,26 @@ amdsmi_is_P2P_accessible(amdsmi_processor_handle processor_handle_src, return amd::smi::rsmi_to_amdsmi_status(rstatus); } +amdsmi_status_t +amdsmi_topo_get_p2p_status(amdsmi_processor_handle processor_handle_src, + amdsmi_processor_handle processor_handle_dst, + amdsmi_io_link_type_t *type, amdsmi_p2p_capability_t *cap) { + AMDSMI_CHECK_INIT(); + + amd::smi::AMDSmiGPUDevice* src_device = nullptr; + amd::smi::AMDSmiGPUDevice* dst_device = nullptr; + amdsmi_status_t r = get_gpu_device_from_handle(processor_handle_src, &src_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + r = get_gpu_device_from_handle(processor_handle_dst, &dst_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + auto rstatus = rsmi_topo_get_p2p_status(src_device->get_gpu_id(), dst_device->get_gpu_id(), + reinterpret_cast(type), + reinterpret_cast(cap)); + return amd::smi::rsmi_to_amdsmi_status(rstatus); +} + // Compute Partition functions amdsmi_status_t amdsmi_get_gpu_compute_partition(amdsmi_processor_handle processor_handle, diff --git a/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc index 7f1e095758..b69385a12a 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc @@ -60,6 +60,7 @@ typedef struct { uint64_t hops; uint64_t weight; bool accessible; + amdsmi_p2p_capability_t cap; } gpu_link_t; TestHWTopologyRead::TestHWTopologyRead() : TestBase() { @@ -136,9 +137,11 @@ void TestHWTopologyRead::Run(void) { gpu_links[dv_ind_src][dv_ind_dst].hops = 0; gpu_links[dv_ind_src][dv_ind_dst].weight = 0; gpu_links[dv_ind_src][dv_ind_dst].accessible = true; + gpu_links[dv_ind_src][dv_ind_dst].cap = + {UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX}; } else { amdsmi_io_link_type_t type; - err = amdsmi_topo_get_link_type(processor_handles_[dv_ind_src], + err = amdsmi_topo_get_link_type(processor_handles_[dv_ind_src], processor_handles_[dv_ind_dst], &gpu_links[dv_ind_src][dv_ind_dst].hops, &type); if (err != AMDSMI_STATUS_SUCCESS) { @@ -170,6 +173,34 @@ void TestHWTopologyRead::Run(void) { } } } + err = amdsmi_topo_get_p2p_status(processor_handles_[dv_ind_src], + processor_handles_[dv_ind_dst], + &type, &gpu_links[dv_ind_src][dv_ind_dst].cap); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << + "\t**Link Type. read: Not supported on this machine" + << std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + switch (type) { + case AMDSMI_IOLINK_TYPE_PCIEXPRESS: + case AMDSMI_IOLINK_TYPE_XGMI: + // Do nothing, the type is printed by the previous test for amdsmi_topo_get_link_type + break; + default: + gpu_links[dv_ind_src][dv_ind_dst].type = "XXXX"; + IF_VERB(STANDARD) { + std::cout << "\t**Invalid IO LINK type. type=" << type << + std::endl; + } + } + } err = amdsmi_topo_get_link_weight(processor_handles_[dv_ind_src], processor_handles_[dv_ind_dst], &gpu_links[dv_ind_src][dv_ind_dst].weight); @@ -286,6 +317,7 @@ void TestHWTopologyRead::Run(void) { std::cout << std::endl; } std::cout << std::endl; + std::cout << "**Access between two GPUs**" << std::endl; std::cout << " "; for (i = 0; i < num_devices; ++i) { @@ -303,4 +335,125 @@ void TestHWTopologyRead::Run(void) { std::cout << std::endl; } std::cout << std::endl; + + std::cout << "**Cache coherency between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + continue; + } + + if (gpu_links[i][j].cap.is_iolink_coherent == UINT8_MAX) { + std::cout << std::setw(12) << std::left << "N/A"; + continue; + } + + std::cout << std::setw(12) << std::left + << (gpu_links[i][j].cap.is_iolink_coherent ? "C" : "NC"); + } + std::cout << std::endl; + } + std::cout << std::endl; + + std::cout << "**Atomics between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + continue; + } + + if (gpu_links[i][j].cap.is_iolink_atomics_64bit == UINT8_MAX || + gpu_links[i][j].cap.is_iolink_atomics_32bit == UINT8_MAX) { + std::cout << std::setw(12) << std::left << "N/A"; + continue; + } + + tmp = gpu_links[i][j].cap.is_iolink_atomics_64bit ? "64" : ""; + if (gpu_links[i][j].cap.is_iolink_atomics_32bit) { + if (!tmp.empty()) { + tmp += ","; + } + tmp += "32"; + } + std::cout << std::setw(12) << std::left << (tmp.empty() ? "N/A" : tmp); + } + std::cout << std::endl; + } + std::cout << std::endl; + + std::cout << "**DMA between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + continue; + } + + if (gpu_links[i][j].cap.is_iolink_dma == UINT8_MAX) { + std::cout << std::setw(12) << std::left << "N/A"; + continue; + } + + std::cout << std::boolalpha; + std::cout << std::setw(12) << std::left + << static_cast(gpu_links[i][j].cap.is_iolink_dma); + } + std::cout << std::endl; + } + std::cout << std::endl; + + std::cout << "**BI-Directional between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + continue; + } + + if (gpu_links[i][j].cap.is_iolink_dma == UINT8_MAX) { + std::cout << std::setw(12) << std::left << "N/A"; + continue; + } + + std::cout << std::boolalpha; + std::cout << std::setw(12) << std::left + << static_cast(gpu_links[i][j].cap.is_iolink_bi_directional); + } + std::cout << std::endl; + } + std::cout << std::endl; }