[SWDEV-463402] - Support retrieving connection type and P2P capabilities between two GPUs

1. Add a API interface amdsmi_topo_get_p2p_status to retrieve connection type and P2P capabilities between 2 GPUs. 2. Add getting p2p status test in hw_topology_read to print P2P capability information. 3. Add below tables for cli topology sub commands: - CACHE COHERANCY TABLE - ATOMICS TABLE - DMA TABLE - BI-DIRECTIONAL TABLE Change-Id: I199173030d4170115cea27c472958a4826e4e1bf Signed-off-by: Tim Huang <tim.huang@amd.com> [ROCm/amdsmi commit: 260edaa752]
2024-08-21 11:26:36 +08:00
@@ -19,6 +19,163 @@ Added `amdsmi_get_gpu_mem_overdrive_level()` function to amd-smi C and Python Li
 - **Added Subsystem Device ID to `amd-smi static --asic`**.
 No underlying changes to amdsmi_get_gpu_asic_info

+- **Added retrieving connection type and P2P capabilities between two GPUs**.
+  - Added `amdsmi_topo_get_p2p_status` function to amd-smi C and Python Libraries.
+  - Added retrieving P2P link capabilities to CLI `amd-smi topology`.
+
+```shell
+$ amd-smi topology -h
+usage: amd-smi topology [-h] [--json | --csv] [--file FILE] [--loglevel LEVEL]
+                        [-g GPU [GPU ...]] [-a] [-w] [-o] [-t] [-b]
+
+If no GPU is specified, returns information for all GPUs on the system.
+If no topology argument is provided all topology information will be displayed.
+
+Topology arguments:
+  -h, --help               show this help message and exit
+  -g, --gpu GPU [GPU ...]  Select a GPU ID, BDF, or UUID from the possible choices:
+                           ID: 0 | BDF: 0000:0c:00.0 | UUID: 5fff74a1-0000-1000-808c-324a4d24b37e
+                           ID: 1 | BDF: 0000:22:00.0 | UUID: 06ff74a1-0000-1000-80d3-f5e97636ae62
+                           ID: 2 | BDF: 0000:38:00.0 | UUID: 87ff74a1-0000-1000-80a0-d0a45576c5ed
+                           ID: 3 | BDF: 0000:5c:00.0 | UUID: 5dff74a1-0000-1000-8054-a29c595fd7f3
+                           ID: 4 | BDF: 0000:9f:00.0 | UUID: a8ff74a1-0000-1000-805b-92615ca9e7b4
+                           ID: 5 | BDF: 0000:af:00.0 | UUID: ddff74a1-0000-1000-809e-5a98a60013bd
+                           ID: 6 | BDF: 0000:bf:00.0 | UUID: 9aff74a1-0000-1000-80e8-cbefaf9f72c3
+                           ID: 7 | BDF: 0000:df:00.0 | UUID: 48ff74a1-0000-1000-806e-3c0b30d78e00
+                             all | Selects all devices
+
+
+  -a, --access             Displays link accessibility between GPUs
+  -w, --weight             Displays relative weight between GPUs
+  -o, --hops               Displays the number of hops between GPUs
+  -t, --link-type          Displays the link type between GPUs
+  -b, --numa-bw            Display max and min bandwidth between nodes
+  -c, --coherent           Display cache coherant (or non-coherant) link capability between nodes
+  -n, --atomics            Display 32 and 64-bit atomic io link capability between nodes
+  -d, --dma                Display P2P direct memory access (DMA) link capability between nodes
+  -z, --bi-dir             Display P2P bi-directional link capability between nodes
+
+
+Command Modifiers:
+  --json                   Displays output in JSON format (human readable by default).
+  --csv                    Displays output in CSV format (human readable by default).
+  --file FILE              Saves output into a file on the provided path (stdout by default).
+  --loglevel LEVEL         Set the logging level from the possible choices:
+                                DEBUG, INFO, WARNING, ERROR, CRITICAL
+```
+
+```shell
+$ amd-smi topology
+ACCESS TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:22:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:38:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:5c:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:9f:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:af:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:bf:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+0000:df:00.0 ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED      ENABLED
+
+WEIGHT TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 0            15           15           15           15           15           15           15
+0000:22:00.0 15           0            15           15           15           15           15           15
+0000:38:00.0 15           15           0            15           15           15           15           15
+0000:5c:00.0 15           15           15           0            15           15           15           15
+0000:9f:00.0 15           15           15           15           0            15           15           15
+0000:af:00.0 15           15           15           15           15           0            15           15
+0000:bf:00.0 15           15           15           15           15           15           0            15
+0000:df:00.0 15           15           15           15           15           15           15           0
+
+HOPS TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 0            1            1            1            1            1            1            1
+0000:22:00.0 1            0            1            1            1            1            1            1
+0000:38:00.0 1            1            0            1            1            1            1            1
+0000:5c:00.0 1            1            1            0            1            1            1            1
+0000:9f:00.0 1            1            1            1            0            1            1            1
+0000:af:00.0 1            1            1            1            1            0            1            1
+0000:bf:00.0 1            1            1            1            1            1            0            1
+0000:df:00.0 1            1            1            1            1            1            1            0
+
+LINK TYPE TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 SELF         XGMI         XGMI         XGMI         XGMI         XGMI         XGMI         XGMI
+0000:22:00.0 XGMI         SELF         XGMI         XGMI         XGMI         XGMI         XGMI         XGMI
+0000:38:00.0 XGMI         XGMI         SELF         XGMI         XGMI         XGMI         XGMI         XGMI
+0000:5c:00.0 XGMI         XGMI         XGMI         SELF         XGMI         XGMI         XGMI         XGMI
+0000:9f:00.0 XGMI         XGMI         XGMI         XGMI         SELF         XGMI         XGMI         XGMI
+0000:af:00.0 XGMI         XGMI         XGMI         XGMI         XGMI         SELF         XGMI         XGMI
+0000:bf:00.0 XGMI         XGMI         XGMI         XGMI         XGMI         XGMI         SELF         XGMI
+0000:df:00.0 XGMI         XGMI         XGMI         XGMI         XGMI         XGMI         XGMI         SELF
+
+NUMA BW TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 N/A          50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  50000-50000
+0000:22:00.0 50000-50000  N/A          50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  50000-50000
+0000:38:00.0 50000-50000  50000-50000  N/A          50000-50000  50000-50000  50000-50000  50000-50000  50000-50000
+0000:5c:00.0 50000-50000  50000-50000  50000-50000  N/A          50000-50000  50000-50000  50000-50000  50000-50000
+0000:9f:00.0 50000-50000  50000-50000  50000-50000  50000-50000  N/A          50000-50000  50000-50000  50000-50000
+0000:af:00.0 50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  N/A          50000-50000  50000-50000
+0000:bf:00.0 50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  N/A          50000-50000
+0000:df:00.0 50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  50000-50000  N/A
+
+CACHE COHERANCY TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 SELF         C            NC           NC           C            C            C            NC
+0000:22:00.0 C            SELF         NC           C            C            C            NC           C
+0000:38:00.0 NC           NC           SELF         C            C            NC           C            NC
+0000:5c:00.0 NC           C            C            SELF         NC           C            NC           NC
+0000:9f:00.0 C            C            C            NC           SELF         NC           NC           C
+0000:af:00.0 C            C            NC           C            NC           SELF         C            C
+0000:bf:00.0 C            NC           C            NC           NC           C            SELF         NC
+0000:df:00.0 NC           C            NC           NC           C            C            NC           SELF
+
+ATOMICS TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 SELF         64,32        64,32        64           32           32           N/A          64,32
+0000:22:00.0 64,32        SELF         64           32           32           N/A          64,32        64,32
+0000:38:00.0 64,32        64           SELF         32           N/A          64,32        64,32        64,32
+0000:5c:00.0 64           32           32           SELF         64,32        64,32        64,32        32
+0000:9f:00.0 32           32           N/A          64,32        SELF         64,32        32           32
+0000:af:00.0 32           N/A          64,32        64,32        64,32        SELF         32           N/A
+0000:bf:00.0 N/A          64,32        64,32        64,32        32           32           SELF         64,32
+0000:df:00.0 64,32        64,32        64,32        32           32           N/A          64,32        SELF
+
+DMA TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 SELF         T            T            F            F            T            F            T
+0000:22:00.0 T            SELF         F            F            T            F            T            T
+0000:38:00.0 T            F            SELF         T            F            T            T            T
+0000:5c:00.0 F            F            T            SELF         T            T            T            F
+0000:9f:00.0 F            T            F            T            SELF         T            F            F
+0000:af:00.0 T            F            T            T            T            SELF         F            T
+0000:bf:00.0 F            T            T            T            F            F            SELF         F
+0000:df:00.0 T            T            T            F            F            T            F            SELF
+
+BI-DIRECTIONAL TABLE:
+             0000:0c:00.0 0000:22:00.0 0000:38:00.0 0000:5c:00.0 0000:9f:00.0 0000:af:00.0 0000:bf:00.0 0000:df:00.0
+0000:0c:00.0 SELF         T            T            F            F            T            F            T
+0000:22:00.0 T            SELF         F            F            T            F            T            T
+0000:38:00.0 T            F            SELF         T            F            T            T            T
+0000:5c:00.0 F            F            T            SELF         T            T            T            F
+0000:9f:00.0 F            T            F            T            SELF         T            F            F
+0000:af:00.0 T            F            T            T            T            SELF         F            T
+0000:bf:00.0 F            T            T            T            F            F            SELF         F
+0000:df:00.0 T            T            T            F            F            T            F            SELF
+
+
+Legend:
+ SELF = Current GPU
+ ENABLED / DISABLED = Link is enabled or disabled
+ N/A = Not supported
+ T/F = True / False
+ C/NC = Coherant / Non-Coherant io links
+ 64,32 = 64 bit and 32 bit atomic support
+ <BW from>-<BW to>
+```
+
 ### Removals

 - N/A
@@ -2822,7 +2822,8 @@ class AMDSMICommands():


    def topology(self, args, multiple_devices=False, gpu=None, access=None,
-                weight=None, hops=None, link_type=None, numa_bw=None):
+                weight=None, hops=None, link_type=None, numa_bw=None,
+                coherent=None, atomics=None, dma=None, bi_dir=None):
        """ Get topology information for target gpus
            params:
                args - argparser args to pass to subcommand
@@ -2833,6 +2834,10 @@ class AMDSMICommands():
                hops (bool) - Value override for args.hops
                type (bool) - Value override for args.type
                numa_bw (bool) - Value override for args.numa_bw
+                coherent (bool) - Value override for args.coherent
+                atomics (bool) - Value override for args.atomics
+                dma (bool) - Value override for args.dma
+                bi_dir (bool) - Value override for args.bi_dir
            return:
                Nothing
        """
@@ -2849,6 +2854,14 @@ class AMDSMICommands():
            args.link_type = link_type
        if numa_bw:
            args.numa_bw = numa_bw
+        if coherent:
+            args.coherent = coherent
+        if atomics:
+            args.atomics = atomics
+        if dma:
+            args.dma = dma
+        if bi_dir:
+            args.bi_dir = bi_dir

        # Handle No GPU passed
        if args.gpu == None:
@@ -2858,8 +2871,10 @@ class AMDSMICommands():
            args.gpu = [args.gpu]

        # Handle all args being false
-        if not any([args.access, args.weight, args.hops, args.link_type, args.numa_bw]):
-            args.access = args.weight = args.hops = args.link_type= args.numa_bw = True
+        if not any([args.access, args.weight, args.hops, args.link_type, args.numa_bw,
+                    args.coherent, args.atomics, args.dma, args.bi_dir]):
+            args.access = args.weight = args.hops = args.link_type= args.numa_bw = \
+            args.coherent = args.atomics = args.dma = args.bi_dir = True

        # Clear the table header
        self.logger.table_header = ''.rjust(12)
@@ -2890,6 +2905,10 @@ class AMDSMICommands():
            #         "num_hops": num_hops - # of hops between devices
            #         "bandwidth": numa_bw - The NUMA "minimum bandwidth-maximum bandwidth" beween src and dest nodes
            #                      "N/A" - self node or not connected devices
+            #         "coherent": coherent - Coherant / Non-Coherant io links
+            #         "atomics": atomics - 32 and 64-bit atomic io link capability between nodes
+            #         "dma": dma - P2P direct memory access (DMA) link capability between nodes
+            #         "bi_dir": bi_dir - P2P bi-directional link capability between nodes
            #     }

            for dest_gpu_index, dest_gpu in enumerate(args.gpu):
@@ -2928,6 +2947,42 @@ class AMDSMICommands():
                else:
                    link_status = "DISABLED"

+                link_coherent = "SELF"
+                link_atomics = "SELF"
+                link_dma = "SELF"
+                link_bi_dir = "SELF"
+
+                if src_gpu != dest_gpu:
+                    try:
+                        cap = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']
+                        link_coherent = (
+                            "C" if cap['is_iolink_coherent'] == 1 else
+                            "NC" if cap['is_iolink_coherent'] == 0 else
+                            "N/A"
+                        )
+                        link_atomics = (
+                            "64,32" if cap['is_iolink_atomics_32bit'] == 1 and cap['is_iolink_atomics_64bit'] == 1 else
+                            "32" if cap['is_iolink_atomics_32bit'] == 1 else
+                            "64" if cap['is_iolink_atomics_64bit'] == 1 else
+                            "N/A"
+                        )
+                        link_dma = (
+                            "T" if cap['is_iolink_dma'] == 1 else
+                            "F" if cap['is_iolink_dma'] == 0 else
+                            "N/A"
+                        )
+                        link_bi_dir = (
+                            "T" if cap['is_iolink_bi_directional'] == 1 else
+                            "F" if cap['is_iolink_bi_directional'] == 0 else
+                            "N/A"
+                        )
+                    except amdsmi_exception.AmdSmiLibraryException as e:
+                        logging.debug("Failed to get link status for %s to %s | %s",
+                                    self.helpers.get_gpu_id_from_device_handle(src_gpu),
+                                    self.helpers.get_gpu_id_from_device_handle(dest_gpu),
+                                    e.get_error_info())
+
+
                # link_status = amdsmi_is_P2P_accessible(src,dest)
                dest_gpu_links = {
                    "gpu": self.helpers.get_gpu_id_from_device_handle(dest_gpu),
@@ -2937,6 +2992,10 @@ class AMDSMICommands():
                    "link_type": link_type,
                    "num_hops": num_hops,
                    "bandwidth": numa_bw,
+                    "coherent": link_coherent,
+                    "atomics": link_atomics,
+                    "dma": link_dma,
+                    "bi_dir": link_bi_dir
                }
                if not args.access:
                    del dest_gpu_links['link_status']
@@ -2948,6 +3007,14 @@ class AMDSMICommands():
                    del dest_gpu_links['num_hops']
                if not args.numa_bw:
                    del dest_gpu_links['bandwidth']
+                if not args.coherent:
+                    del dest_gpu_links['coherent']
+                if not args.atomics:
+                    del dest_gpu_links['atomics']
+                if not args.dma:
+                    del dest_gpu_links['dma']
+                if not args.bi_dir:
+                    del dest_gpu_links['bi_dir']
                links.append(dest_gpu_links)
                dest_end = dest_gpu_index+1 == len(args.gpu)
                isEndOfSrc = src_gpu_index+1 == len(args.gpu)
@@ -3165,6 +3232,175 @@ class AMDSMICommands():
                self.logger.table_title = "NUMA BW TABLE"
                self.logger.print_output(multiple_device_enabled=True, tabular=True)

+        if args.coherent:
+            tabular_output = []
+            for src_gpu_index, src_gpu in enumerate(args.gpu):
+                src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu)
+                if self.logger.is_human_readable_format():
+                    tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "}
+                else:
+                    tabular_output_dict = {'gpu' : src_gpu_bdf}
+                src_gpu_coherent = {}
+                for dest_gpu in args.gpu:
+                    dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu)
+                    dest_gpu_key = f'gpu_{dest_gpu_id}'
+
+                    if src_gpu == dest_gpu:
+                        src_gpu_coherent[dest_gpu_key] = "SELF"
+                        continue
+                    try:
+                        iolink_coherent = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']['is_iolink_coherent']
+                        src_gpu_coherent[dest_gpu_key] = "C" if iolink_coherent == 1 else "NC" if iolink_coherent == 0 else "N/A"
+                    except amdsmi_exception.AmdSmiLibraryException as e:
+                        src_gpu_coherent[dest_gpu_key] = "N/A"
+                        logging.debug("Failed to get link coherent for %s to %s | %s",
+                                        self.helpers.get_gpu_id_from_device_handle(src_gpu),
+                                        self.helpers.get_gpu_id_from_device_handle(dest_gpu),
+                                        e.get_error_info())
+
+                topo_values[src_gpu_index]['coherent'] = src_gpu_coherent
+
+                tabular_output_dict.update(src_gpu_coherent)
+                tabular_output.append(tabular_output_dict)
+
+            if self.logger.is_human_readable_format():
+                self.logger.multiple_device_output = tabular_output
+                self.logger.table_title = "CACHE COHERANCY TABLE"
+                self.logger.print_output(multiple_device_enabled=True, tabular=True)
+
+        if args.atomics:
+            tabular_output = []
+            for src_gpu_index, src_gpu in enumerate(args.gpu):
+                src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu)
+                if self.logger.is_human_readable_format():
+                    tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "}
+                else:
+                    tabular_output_dict = {'gpu' : src_gpu_bdf}
+                src_gpu_atomics = {}
+                for dest_gpu in args.gpu:
+                    dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu)
+                    dest_gpu_key = f'gpu_{dest_gpu_id}'
+
+                    if src_gpu == dest_gpu:
+                        src_gpu_atomics[dest_gpu_key] = "SELF"
+                        continue
+                    try:
+                        cap = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']
+                        src_gpu_atomics[dest_gpu_key] = (
+                            "64,32" if cap['is_iolink_atomics_32bit'] == 1 and cap['is_iolink_atomics_64bit'] == 1 else
+                            "32" if cap['is_iolink_atomics_32bit'] == 1 else
+                            "64" if cap['is_iolink_atomics_64bit'] == 1 else
+                            "N/A"
+                        )
+                    except amdsmi_exception.AmdSmiLibraryException as e:
+                        src_gpu_atomics[dest_gpu_key] = "N/A"
+                        logging.debug("Failed to get link atomics for %s to %s | %s",
+                                        self.helpers.get_gpu_id_from_device_handle(src_gpu),
+                                        self.helpers.get_gpu_id_from_device_handle(dest_gpu),
+                                        e.get_error_info())
+
+                topo_values[src_gpu_index]['atomics'] = src_gpu_atomics
+
+                tabular_output_dict.update(src_gpu_atomics)
+                tabular_output.append(tabular_output_dict)
+
+            if self.logger.is_human_readable_format():
+                self.logger.multiple_device_output = tabular_output
+                self.logger.table_title = "ATOMICS TABLE"
+                self.logger.print_output(multiple_device_enabled=True, tabular=True)
+
+        if args.dma:
+            tabular_output = []
+            for src_gpu_index, src_gpu in enumerate(args.gpu):
+                src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu)
+                if self.logger.is_human_readable_format():
+                    tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "}
+                else:
+                    tabular_output_dict = {'gpu' : src_gpu_bdf}
+                src_gpu_dma = {}
+                for dest_gpu in args.gpu:
+                    dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu)
+                    dest_gpu_key = f'gpu_{dest_gpu_id}'
+
+                    if src_gpu == dest_gpu:
+                        src_gpu_dma[dest_gpu_key] = "SELF"
+                        continue
+                    try:
+                        iolink_dma = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']['is_iolink_dma']
+                        src_gpu_dma[dest_gpu_key] = "T" if iolink_dma == 1 else "F" if iolink_dma == 0 else "N/A"
+                    except amdsmi_exception.AmdSmiLibraryException as e:
+                        src_gpu_dma[dest_gpu_key] = "N/A"
+                        logging.debug("Failed to get link dma for %s to %s | %s",
+                                        self.helpers.get_gpu_id_from_device_handle(src_gpu),
+                                        self.helpers.get_gpu_id_from_device_handle(dest_gpu),
+                                        e.get_error_info())
+
+                topo_values[src_gpu_index]['dma'] = src_gpu_dma
+
+                tabular_output_dict.update(src_gpu_dma)
+                tabular_output.append(tabular_output_dict)
+
+            if self.logger.is_human_readable_format():
+                self.logger.multiple_device_output = tabular_output
+                self.logger.table_title = "DMA TABLE"
+                self.logger.print_output(multiple_device_enabled=True, tabular=True)
+
+        if args.bi_dir:
+            tabular_output = []
+            for src_gpu_index, src_gpu in enumerate(args.gpu):
+                src_gpu_bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(src_gpu)
+                if self.logger.is_human_readable_format():
+                    tabular_output_dict = {'gpu' : f"{src_gpu_bdf} "}
+                else:
+                    tabular_output_dict = {'gpu' : src_gpu_bdf}
+                src_gpu_bi_dir = {}
+                for dest_gpu in args.gpu:
+                    dest_gpu_id = self.helpers.get_gpu_id_from_device_handle(dest_gpu)
+                    dest_gpu_key = f'gpu_{dest_gpu_id}'
+
+                    if src_gpu == dest_gpu:
+                        src_gpu_bi_dir[dest_gpu_key] = "SELF"
+                        continue
+                    try:
+                        iolink_bi_dir = amdsmi_interface.amdsmi_topo_get_p2p_status(src_gpu, dest_gpu)['cap']['is_iolink_bi_directional']
+                        src_gpu_bi_dir[dest_gpu_key] = "T" if iolink_bi_dir == 1 else "F" if iolink_bi_dir == 0 else "N/A"
+                    except amdsmi_exception.AmdSmiLibraryException as e:
+                        src_gpu_bi_dir[dest_gpu_key] = "N/A"
+                        logging.debug("Failed to get link bi-directional for %s to %s | %s",
+                                        self.helpers.get_gpu_id_from_device_handle(src_gpu),
+                                        self.helpers.get_gpu_id_from_device_handle(dest_gpu),
+                                        e.get_error_info())
+
+                topo_values[src_gpu_index]['bi_dir'] = src_gpu_bi_dir
+
+                tabular_output_dict.update(src_gpu_bi_dir)
+                tabular_output.append(tabular_output_dict)
+
+            if self.logger.is_human_readable_format():
+                self.logger.multiple_device_output = tabular_output
+                self.logger.table_title = "BI-DIRECTIONAL TABLE"
+                self.logger.print_output(multiple_device_enabled=True, tabular=True)
+
+        if self.logger.is_human_readable_format():
+            # Populate the legend output
+            legend_parts = [
+                "\n\nLegend:",
+                "  SELF = Current GPU",
+                "  ENABLED / DISABLED = Link is enabled or disabled",
+                "  N/A = Not supported",
+                "  T/F = True / False",
+                "  C/NC = Coherant / Non-Coherant io links",
+                "  64,32 = 64 bit and 32 bit atomic support",
+                "  <BW from>-<BW to>"
+            ]
+            legend_output = "\n".join(legend_parts)
+
+            if self.logger.destination == 'stdout':
+                print(legend_output)
+            else:
+                with self.logger.destination.open('a', encoding="utf-8") as output_file:
+                    output_file.write(legend_output + '\n')
+
        self.logger.multiple_device_output = topo_values

        if self.logger.is_csv_format():
@@ -972,6 +972,10 @@ class AMDSMIParser(argparse.ArgumentParser):
        hops_help = "Displays the number of hops between GPUs"
        link_type_help = "Displays the link type between GPUs"
        numa_bw_help = "Display max and min bandwidth between nodes"
+        coherent_help = "Display cache coherant (or non-coherant) link capability between nodes"
+        atomics_help = "Display 32 and 64-bit atomic io link capability between nodes"
+        dma_help = "Display P2P direct memory access (DMA) link capability between nodes"
+        bi_dir_help = "Display P2P bi-directional link capability between nodes"

        # Create topology subparser
        topology_parser = subparsers.add_parser('topology', help=topology_help, description=topology_subcommand_help)
@@ -989,6 +993,10 @@ class AMDSMIParser(argparse.ArgumentParser):
        topology_parser.add_argument('-o', '--hops', action='store_true', required=False, help=hops_help)
        topology_parser.add_argument('-t', '--link-type', action='store_true', required=False, help=link_type_help)
        topology_parser.add_argument('-b', '--numa-bw', action='store_true', required=False, help=numa_bw_help)
+        topology_parser.add_argument('-c', '--coherent', action='store_true', required=False, help=coherent_help)
+        topology_parser.add_argument('-n', '--atomics', action='store_true', required=False, help=atomics_help)
+        topology_parser.add_argument('-d', '--dma', action='store_true', required=False, help=dma_help)
+        topology_parser.add_argument('-z', '--bi-dir', action='store_true', required=False, help=bi_dir_help)


    def _add_set_value_parser(self, subparsers, func):
@@ -690,6 +690,17 @@ typedef struct {
  uint32_t reserved[4];
 } amdsmi_proc_info_t;

+/**
+ * @brief IO Link P2P Capability
+ */
+typedef struct {
+  uint8_t is_iolink_coherent;    // 1 = true, 0 = false, UINT8_MAX = Not defined.
+  uint8_t is_iolink_atomics_32bit;
+  uint8_t is_iolink_atomics_64bit;
+  uint8_t is_iolink_dma;
+  uint8_t is_iolink_bi_directional;
+} amdsmi_p2p_capability_t;
+

 //! Guaranteed maximum possible number of supported frequencies
 #define AMDSMI_MAX_NUM_FREQUENCIES 33
@@ -4283,6 +4294,36 @@ amdsmi_is_P2P_accessible(amdsmi_processor_handle processor_handle_src,
                          amdsmi_processor_handle processor_handle_dst,
                          bool *accessible);

+
+/**
+ *  @brief Retrieve connection type and P2P capabilities between 2 GPUs
+ *
+ *  @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf}  @platform{guest_mvf}
+ *
+ *  @details Given a source processor handle @p processor_handle_src and
+ *  a destination processor handle @p processor_handle_dst, a pointer to an amdsmi_io_link_type_t @p type,
+ *  and a pointer to amdsmi_p2p_capability_t @p cap. This function will write the connection type,
+ *  and io link capabilities between the device
+ *  @p processor_handle_src and @p processor_handle_dst to the memory
+ *  pointed to by @p cap and @p type.
+ *
+ *  @param[in] processor_handle_src the source processor handle
+ *
+ *  @param[in] processor_handle_dst the destination processor handle
+ *
+ *  @param[in,out] type A pointer to an ::amdsmi_io_link_type_t to which the
+ *  type for the connection should be written.
+ *
+ *  @param[in,out] type A pointer to an ::amdsmi_p2p_capability_t to which the
+ *  io link capabilities should be written.
+ *
+ *  @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
+ */
+amdsmi_status_t
+amdsmi_topo_get_p2p_status(amdsmi_processor_handle processor_handle_src,
+                           amdsmi_processor_handle processor_handle_dst,
+                           amdsmi_io_link_type_t *type, amdsmi_p2p_capability_t *cap);
+
 /** @} End HWTopo */

 /*****************************************************************************/
@@ -211,6 +211,7 @@ from .amdsmi_interface import amdsmi_topo_get_numa_node_number
 from .amdsmi_interface import amdsmi_topo_get_link_weight
 from .amdsmi_interface import amdsmi_get_minmax_bandwidth_between_processors
 from .amdsmi_interface import amdsmi_topo_get_link_type
+from .amdsmi_interface import amdsmi_topo_get_p2p_status
 from .amdsmi_interface import amdsmi_is_P2P_accessible
 from .amdsmi_interface import amdsmi_get_xgmi_info

@@ -2540,6 +2540,40 @@ def amdsmi_topo_get_link_type(

    return {"hops": hops.value, "type": type.value}

+def amdsmi_topo_get_p2p_status(
+    processor_handle_src: amdsmi_wrapper.amdsmi_processor_handle,
+    processor_handle_dst: amdsmi_wrapper.amdsmi_processor_handle,
+):
+    if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle):
+        raise AmdSmiParameterException(
+            processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle
+        )
+
+    if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle):
+        raise AmdSmiParameterException(
+            processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle
+        )
+
+    type = ctypes.c_uint32()
+    cap = amdsmi_wrapper.struct_amdsmi_p2p_capability_t()
+
+    _check_res(
+        amdsmi_wrapper.amdsmi_topo_get_p2p_status(
+            processor_handle_src, processor_handle_dst, ctypes.byref(type), ctypes.byref(cap)
+        )
+    )
+
+    return {
+        'type' : type,
+        'cap': {
+            'is_iolink_coherent': cap.is_iolink_coherent,
+            'is_iolink_atomics_32bit': cap.is_iolink_atomics_32bit,
+            'is_iolink_atomics_64bit': cap.is_iolink_atomics_64bit,
+            'is_iolink_dma': cap.is_iolink_dma,
+            'is_iolink_bi_directional': cap.is_iolink_bi_directional
+        }
+    }
+

 def amdsmi_is_P2P_accessible(
    processor_handle_src: amdsmi_wrapper.amdsmi_processor_handle,
@@ -1065,6 +1065,19 @@ struct_amdsmi_proc_info_t._fields_ = [
 ]

 amdsmi_proc_info_t = struct_amdsmi_proc_info_t
+class struct_amdsmi_p2p_capability_t(Structure):
+    pass
+
+struct_amdsmi_p2p_capability_t._pack_ = 1 # source:False
+struct_amdsmi_p2p_capability_t._fields_ = [
+    ('is_iolink_coherent', ctypes.c_ubyte),
+    ('is_iolink_atomics_32bit', ctypes.c_ubyte),
+    ('is_iolink_atomics_64bit', ctypes.c_ubyte),
+    ('is_iolink_dma', ctypes.c_ubyte),
+    ('is_iolink_bi_directional', ctypes.c_ubyte),
+]
+
+amdsmi_p2p_capability_t = struct_amdsmi_p2p_capability_t

 # values for enumeration 'amdsmi_dev_perf_level_t'
 amdsmi_dev_perf_level_t__enumvalues = {
@@ -2201,6 +2214,9 @@ amdsmi_topo_get_link_type.argtypes = [amdsmi_processor_handle, amdsmi_processor_
 amdsmi_is_P2P_accessible = _libraries['libamd_smi.so'].amdsmi_is_P2P_accessible
 amdsmi_is_P2P_accessible.restype = amdsmi_status_t
 amdsmi_is_P2P_accessible.argtypes = [amdsmi_processor_handle, amdsmi_processor_handle, ctypes.POINTER(ctypes.c_bool)]
+amdsmi_topo_get_p2p_status = _libraries['libamd_smi.so'].amdsmi_topo_get_p2p_status
+amdsmi_topo_get_p2p_status.restype = amdsmi_status_t
+amdsmi_topo_get_p2p_status.argtypes = [amdsmi_processor_handle, amdsmi_processor_handle, ctypes.POINTER(amdsmi_io_link_type_t), ctypes.POINTER(struct_amdsmi_p2p_capability_t)]
 amdsmi_get_gpu_compute_partition = _libraries['libamd_smi.so'].amdsmi_get_gpu_compute_partition
 amdsmi_get_gpu_compute_partition.restype = amdsmi_status_t
 amdsmi_get_gpu_compute_partition.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_char), uint32_t]
@@ -2726,9 +2742,9 @@ __all__ = \
    'amdsmi_memory_partition_type_t', 'amdsmi_memory_type_t',
    'amdsmi_mm_ip_t', 'amdsmi_name_value_t', 'amdsmi_od_vddc_point_t',
    'amdsmi_od_volt_curve_t', 'amdsmi_od_volt_freq_data_t',
-    'amdsmi_pcie_bandwidth_t', 'amdsmi_pcie_info_t',
-    'amdsmi_power_cap_info_t', 'amdsmi_power_info_t',
-    'amdsmi_power_profile_preset_masks_t',
+    'amdsmi_p2p_capability_t', 'amdsmi_pcie_bandwidth_t',
+    'amdsmi_pcie_info_t', 'amdsmi_power_cap_info_t',
+    'amdsmi_power_info_t', 'amdsmi_power_profile_preset_masks_t',
    'amdsmi_power_profile_status_t', 'amdsmi_power_type_t',
    'amdsmi_proc_info_t', 'amdsmi_process_handle_t',
    'amdsmi_process_info_t', 'amdsmi_processor_handle',
@@ -2761,7 +2777,7 @@ __all__ = \
    'amdsmi_temp_range_refresh_rate_t', 'amdsmi_temperature_metric_t',
    'amdsmi_temperature_type_t', 'amdsmi_topo_get_link_type',
    'amdsmi_topo_get_link_weight', 'amdsmi_topo_get_numa_node_number',
-    'amdsmi_utilization_counter_t',
+    'amdsmi_topo_get_p2p_status', 'amdsmi_utilization_counter_t',
    'amdsmi_utilization_counter_type_t', 'amdsmi_vbios_info_t',
    'amdsmi_version_t', 'amdsmi_voltage_metric_t',
    'amdsmi_voltage_type_t', 'amdsmi_vram_info_t',
@@ -2785,6 +2801,7 @@ __all__ = \
    'struct_amdsmi_name_value_t', 'struct_amdsmi_od_vddc_point_t',
    'struct_amdsmi_od_volt_curve_t',
    'struct_amdsmi_od_volt_freq_data_t',
+    'struct_amdsmi_p2p_capability_t',
    'struct_amdsmi_pcie_bandwidth_t', 'struct_amdsmi_pcie_info_t',
    'struct_amdsmi_power_cap_info_t', 'struct_amdsmi_power_info_t',
    'struct_amdsmi_power_profile_status_t',
@@ -866,6 +866,17 @@ typedef struct {
 typedef rsmi_frequencies_t rsmi_frequencies;
 /// \endcond

+/**
+ * @brief IO Link P2P Capability
+ */
+typedef struct {
+  uint8_t is_iolink_coherent;    // 1 = true, 0 = false, UINT8_MAX = Not defined.
+  uint8_t is_iolink_atomics_32bit;
+  uint8_t is_iolink_atomics_64bit;
+  uint8_t is_iolink_dma;
+  uint8_t is_iolink_bi_directional;
+} rsmi_p2p_capability_t;
+
 /**
 * @brief This structure holds information about the possible PCIe
 * bandwidths. Specifically, the possible transfer rates and their
@@ -4326,6 +4337,37 @@ rsmi_status_t
 rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
                       bool *accessible);

+/**
+ *  @brief Retrieve connection type and P2P capabilities between 2 GPUs
+ *
+ *  @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf}  @platform{guest_mvf}
+ *
+ *  @details Given a source processor handle @p processor_handle_src and
+ *  a destination processor handle @p processor_handle_dst, a pointer to an amdsmi_io_link_type_t @p type,
+ *  and a pointer to rsmi_p2p_capability_t @p cap. This function will write the connection type,
+ *  and io link capabilities between the device
+ *  @p processor_handle_src and @p processor_handle_dst to the memory
+ *  pointed to by @p cap and @p type.
+ *
+ *  @param[in] dv_ind_src the source device index
+ *
+ *  @param[in] dv_ind_dst the destination device index
+ *
+ *  @param[inout] type A pointer to an ::RSMI_IO_LINK_TYPE to which the
+ *  type for the connection should be written.
+ *
+ *  @param[in,out] cap A pointer to an ::rsmi_p2p_capability_t to which the
+ *  io link capabilities should be written.
+ *
+ *  @retval ::RSMI_STATUS_SUCCESS call was successful
+ *  @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
+ *  @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
+ *  support this function
+ */
+rsmi_status_t
+rsmi_topo_get_p2p_status(uint32_t dv_ind_src, uint32_t dv_ind_dst,
+                         RSMI_IO_LINK_TYPE *type, rsmi_p2p_capability_t *cap);
+
 /** @} */  // end of HWTopo

 /*****************************************************************************/
@@ -85,7 +85,8 @@ typedef enum _LINK_DIRECTORY_TYPE {
 class IOLink {
 public:
    explicit IOLink(uint32_t node_indx, uint32_t link_indx, LINK_DIRECTORY_TYPE link_dir_type) :
-                    node_indx_(node_indx), link_indx_(link_indx), link_dir_type_(link_dir_type) {}
+                    node_indx_(node_indx), link_indx_(link_indx), link_dir_type_(link_dir_type),
+                    link_cap_{UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX} {}
    ~IOLink();

    int Initialize();
@@ -96,23 +97,28 @@ class IOLink {
    IO_LINK_TYPE type(void) const {return type_;}
    uint32_t node_from(void) const {return node_from_;}
    uint32_t node_to(void) const {return node_to_;}
+    uint32_t flag(void) const {return flags_;}
    uint64_t weight(void) const {return weight_;}
    LINK_DIRECTORY_TYPE get_directory_type(void) const {return link_dir_type_;}
    uint64_t min_bandwidth(void) const {return min_bandwidth_;}
    uint64_t max_bandwidth(void) const {return max_bandwidth_;}
+    const rsmi_p2p_capability_t& get_link_capability(void) const {return link_cap_;}

-
+ protected:
+    virtual int UpdateP2pCapability(void);
 private:
    uint32_t node_indx_;
    uint32_t link_indx_;
    IO_LINK_TYPE type_;
    uint32_t node_from_;
    uint32_t node_to_;
+    uint32_t flags_;
    uint64_t weight_;
    uint64_t min_bandwidth_;
    uint64_t max_bandwidth_;
    std::map<std::string, uint64_t> properties_;
    LINK_DIRECTORY_TYPE link_dir_type_;
+    rsmi_p2p_capability_t link_cap_;
 };

 int
@@ -5285,6 +5285,81 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
  CATCH
 }

+rsmi_status_t
+rsmi_topo_get_p2p_status(uint32_t dv_ind_src, uint32_t dv_ind_dst,
+                         RSMI_IO_LINK_TYPE *type, rsmi_p2p_capability_t *cap) {
+  TRY
+
+  uint32_t dv_ind = dv_ind_src;
+  GET_DEV_AND_KFDNODE_FROM_INDX
+  DEVICE_MUTEX
+
+  if (type == nullptr || cap == nullptr) {
+    return RSMI_STATUS_INVALID_ARGS;
+  }
+
+  // If source device is same as destination, return invalid args
+  if (dv_ind_src == dv_ind_dst) {
+    return RSMI_STATUS_INVALID_ARGS;
+  }
+
+  uint32_t node_ind_src, node_ind_dst;
+  // Fetch the source and destination node index
+  if (smi.get_node_index(dv_ind_src, &node_ind_src) ||
+      smi.get_node_index(dv_ind_dst, &node_ind_dst)) {
+    return RSMI_STATUS_INVALID_ARGS;
+  }
+
+  bool node_is_find = false;
+  std::map<uint32_t, std::shared_ptr<amd::smi::IOLink>> io_link_map_tmp;
+  std::map<uint32_t, std::shared_ptr<amd::smi::IOLink>>::iterator it;
+  // Iterate over P2P links
+  if (DiscoverP2PLinksPerNode(node_ind_src, &io_link_map_tmp) == 0) {
+    for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++) {
+      if (it->first == node_ind_dst) {
+        node_is_find = true;
+        break;
+      }
+    }
+    io_link_map_tmp.clear();
+  } else {
+    return RSMI_STATUS_FILE_ERROR;
+  }
+
+  if (!node_is_find) {
+    // Iterate over IO links
+    if (DiscoverIOLinksPerNode(node_ind_src, &io_link_map_tmp) == 0) {
+      for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++) {
+        if (it->first == node_ind_dst) {
+          node_is_find = true;
+          break;
+        }
+      }
+      io_link_map_tmp.clear();
+    } else {
+      return RSMI_STATUS_FILE_ERROR;
+    }
+  }
+
+  if (node_is_find) {
+    amd::smi::IO_LINK_TYPE io_link_type = it->second->type();
+    if (io_link_type == amd::smi::IOLINK_TYPE_PCIEXPRESS) {
+      *type = RSMI_IOLINK_TYPE_PCIEXPRESS;
+    } else if (io_link_type == amd::smi::IOLINK_TYPE_XGMI) {
+      *type = RSMI_IOLINK_TYPE_XGMI;
+    } else {
+      // Unexpected IO Link type read
+      return RSMI_STATUS_NOT_SUPPORTED;
+    }
+    *cap = it->second->get_link_capability();
+    return RSMI_STATUS_SUCCESS;
+  }
+
+  return RSMI_STATUS_NOT_SUPPORTED;
+
+  CATCH
+}
+
 static rsmi_status_t
 get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
  TRY
@@ -57,6 +57,15 @@
 #include "rocm_smi/rocm_smi_utils.h"
 #include "rocm_smi/rocm_smi_io_link.h"

+
+#define CRAT_IOLINK_FLAGS_ENABLED                 (1 << 0)
+#define CRAT_IOLINK_FLAGS_NON_COHERENT            (1 << 1)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT       (1 << 2)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT       (1 << 3)
+#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA     (1 << 4)
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL          (1 << 31)
+#define CRAT_IOLINK_FLAGS_RESERVED_MASK           0x7fffffe0
+
 namespace amd {
 namespace smi {

@@ -76,7 +85,7 @@ static const char *kIOLinkPropMIN_BANDWIDTHStr = "min_bandwidth";
 static const char *kIOLinkPropMAX_BANDWIDTHStr = "max_bandwidth";
 // static const char *kIOLinkPropRECOMMENDED_TRANSFER_SIZEStr =
 // "recommended_transfer_size";
-// static const char *kIOLinkPropFLAGSStr = "flags";
+static const char *kIOLinkPropFLAGSStr = "flags";

 static bool is_number(const std::string &s) {
  return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit);
@@ -380,6 +389,12 @@ IOLink::Initialize(void) {
  ret = get_property_value(kIOLinkPropWEIGHTStr, &weight_);
  if (ret) {return ret;}

+  ret = get_property_value(kIOLinkPropFLAGSStr, reinterpret_cast<uint64_t *>(&flags_));
+  if (ret) {return ret;}
+
+  ret = UpdateP2pCapability();
+  if (ret) {return ret;}
+
  ret = get_property_value(kIOLinkPropMIN_BANDWIDTHStr, &min_bandwidth_);
  if (ret) {return ret;}

@@ -401,5 +416,31 @@ IOLink::get_property_value(std::string property, uint64_t *value) {
  return 0;
 }

+int IOLink::UpdateP2pCapability(void) {
+    const uint8_t cap_true = 1;
+    const uint8_t cap_false = 0;
+
+    if (!(flags_ & CRAT_IOLINK_FLAGS_ENABLED)) {
+        return 0;
+    }
+
+    link_cap_.is_iolink_coherent =
+      (flags_ & CRAT_IOLINK_FLAGS_NON_COHERENT) ? cap_false : cap_true;
+
+    link_cap_.is_iolink_atomics_32bit =
+      (flags_ & CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT) ? cap_false : cap_true;
+
+    link_cap_.is_iolink_atomics_64bit =
+      (flags_ & CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT) ? cap_false : cap_true;
+
+    link_cap_.is_iolink_bi_directional =
+      (flags_ & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL) ? cap_true : cap_false;
+
+    link_cap_.is_iolink_dma =
+      (flags_ & CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA) ? cap_false : cap_true;
+
+    return 0;
+}
+
 }  // namespace smi
 }  // namespace amd
@@ -1053,6 +1053,26 @@ amdsmi_is_P2P_accessible(amdsmi_processor_handle processor_handle_src,
    return amd::smi::rsmi_to_amdsmi_status(rstatus);
 }

+amdsmi_status_t
+amdsmi_topo_get_p2p_status(amdsmi_processor_handle processor_handle_src,
+                           amdsmi_processor_handle processor_handle_dst,
+                           amdsmi_io_link_type_t *type, amdsmi_p2p_capability_t *cap) {
+    AMDSMI_CHECK_INIT();
+
+    amd::smi::AMDSmiGPUDevice* src_device = nullptr;
+    amd::smi::AMDSmiGPUDevice* dst_device = nullptr;
+    amdsmi_status_t r = get_gpu_device_from_handle(processor_handle_src, &src_device);
+    if (r != AMDSMI_STATUS_SUCCESS)
+        return r;
+    r = get_gpu_device_from_handle(processor_handle_dst, &dst_device);
+    if (r != AMDSMI_STATUS_SUCCESS)
+        return r;
+    auto rstatus = rsmi_topo_get_p2p_status(src_device->get_gpu_id(), dst_device->get_gpu_id(),
+                reinterpret_cast<RSMI_IO_LINK_TYPE*>(type),
+                reinterpret_cast<rsmi_p2p_capability_t*>(cap));
+    return amd::smi::rsmi_to_amdsmi_status(rstatus);
+}
+
 // Compute Partition functions
 amdsmi_status_t
 amdsmi_get_gpu_compute_partition(amdsmi_processor_handle processor_handle,
@@ -60,6 +60,7 @@ typedef struct {
  uint64_t hops;
  uint64_t weight;
  bool accessible;
+  amdsmi_p2p_capability_t cap;
 } gpu_link_t;

 TestHWTopologyRead::TestHWTopologyRead() : TestBase() {
@@ -136,9 +137,11 @@ void TestHWTopologyRead::Run(void) {
        gpu_links[dv_ind_src][dv_ind_dst].hops = 0;
        gpu_links[dv_ind_src][dv_ind_dst].weight = 0;
        gpu_links[dv_ind_src][dv_ind_dst].accessible = true;
+        gpu_links[dv_ind_src][dv_ind_dst].cap =
+          {UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX};
      } else {
        amdsmi_io_link_type_t type;
-        err = amdsmi_topo_get_link_type(processor_handles_[dv_ind_src], 
+        err = amdsmi_topo_get_link_type(processor_handles_[dv_ind_src],
                processor_handles_[dv_ind_dst],
                &gpu_links[dv_ind_src][dv_ind_dst].hops, &type);
        if (err != AMDSMI_STATUS_SUCCESS) {
@@ -170,6 +173,34 @@ void TestHWTopologyRead::Run(void) {
              }
          }
        }
+        err = amdsmi_topo_get_p2p_status(processor_handles_[dv_ind_src],
+                processor_handles_[dv_ind_dst],
+                &type, &gpu_links[dv_ind_src][dv_ind_dst].cap);
+        if (err != AMDSMI_STATUS_SUCCESS) {
+          if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
+            IF_VERB(STANDARD) {
+              std::cout <<
+                  "\t**Link Type. read: Not supported on this machine"
+                                                                 << std::endl;
+              return;
+            }
+          } else {
+            CHK_ERR_ASRT(err)
+          }
+        } else {
+          switch (type) {
+            case AMDSMI_IOLINK_TYPE_PCIEXPRESS:
+            case AMDSMI_IOLINK_TYPE_XGMI:
+              // Do nothing, the type is printed by the previous test for amdsmi_topo_get_link_type
+              break;
+            default:
+              gpu_links[dv_ind_src][dv_ind_dst].type = "XXXX";
+              IF_VERB(STANDARD) {
+                std::cout << "\t**Invalid IO LINK type. type=" << type <<
+                                                                    std::endl;
+              }
+          }
+        }
        err = amdsmi_topo_get_link_weight(processor_handles_[dv_ind_src],
                    processor_handles_[dv_ind_dst],
                                   &gpu_links[dv_ind_src][dv_ind_dst].weight);
@@ -286,6 +317,7 @@ void TestHWTopologyRead::Run(void) {
    std::cout << std::endl;
  }
  std::cout << std::endl;
+
  std::cout << "**Access between two GPUs**" << std::endl;
  std::cout << "      ";
  for (i = 0; i < num_devices; ++i) {
@@ -303,4 +335,125 @@ void TestHWTopologyRead::Run(void) {
    std::cout << std::endl;
  }
  std::cout << std::endl;
+
+  std::cout << "**Cache coherency between two GPUs**" << std::endl;
+  std::cout << "      ";
+  for (i = 0; i < num_devices; ++i) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(12) << std::left << tmp;
+  }
+  std::cout << std::endl;
+  for (i = 0; i < num_devices; i++) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(6) << std::left << tmp;
+    for (j = 0; j < num_devices; j++) {
+      if (i == j) {
+        std::cout << std::setw(12) << std::left << "X";
+        continue;
+      }
+
+      if (gpu_links[i][j].cap.is_iolink_coherent == UINT8_MAX) {
+        std::cout << std::setw(12) << std::left << "N/A";
+        continue;
+      }
+
+      std::cout << std::setw(12) << std::left
+                << (gpu_links[i][j].cap.is_iolink_coherent ? "C" : "NC");
+    }
+    std::cout << std::endl;
+  }
+  std::cout << std::endl;
+
+  std::cout << "**Atomics between two GPUs**" << std::endl;
+  std::cout << "      ";
+  for (i = 0; i < num_devices; ++i) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(12) << std::left << tmp;
+  }
+  std::cout << std::endl;
+  for (i = 0; i < num_devices; i++) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(6) << std::left << tmp;
+    for (j = 0; j < num_devices; j++) {
+      if (i == j) {
+        std::cout << std::setw(12) << std::left << "X";
+        continue;
+      }
+
+      if (gpu_links[i][j].cap.is_iolink_atomics_64bit == UINT8_MAX ||
+          gpu_links[i][j].cap.is_iolink_atomics_32bit == UINT8_MAX) {
+        std::cout << std::setw(12) << std::left << "N/A";
+        continue;
+      }
+
+      tmp = gpu_links[i][j].cap.is_iolink_atomics_64bit ? "64" : "";
+      if (gpu_links[i][j].cap.is_iolink_atomics_32bit) {
+        if (!tmp.empty()) {
+          tmp += ",";
+        }
+        tmp += "32";
+      }
+      std::cout << std::setw(12) << std::left << (tmp.empty() ? "N/A" : tmp);
+    }
+    std::cout << std::endl;
+  }
+  std::cout << std::endl;
+
+  std::cout << "**DMA between two GPUs**" << std::endl;
+  std::cout << "      ";
+  for (i = 0; i < num_devices; ++i) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(12) << std::left << tmp;
+  }
+  std::cout << std::endl;
+  for (i = 0; i < num_devices; i++) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(6) << std::left << tmp;
+    for (j = 0; j < num_devices; j++) {
+      if (i == j) {
+        std::cout << std::setw(12) << std::left << "X";
+        continue;
+      }
+
+      if (gpu_links[i][j].cap.is_iolink_dma == UINT8_MAX) {
+        std::cout << std::setw(12) << std::left << "N/A";
+        continue;
+      }
+
+      std::cout << std::boolalpha;
+      std::cout << std::setw(12) << std::left
+                << static_cast<bool>(gpu_links[i][j].cap.is_iolink_dma);
+    }
+    std::cout << std::endl;
+  }
+  std::cout << std::endl;
+
+  std::cout << "**BI-Directional between two GPUs**" << std::endl;
+  std::cout << "      ";
+  for (i = 0; i < num_devices; ++i) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(12) << std::left << tmp;
+  }
+  std::cout << std::endl;
+  for (i = 0; i < num_devices; i++) {
+    tmp = "GPU" + std::to_string(i);
+    std::cout << std::setw(6) << std::left << tmp;
+    for (j = 0; j < num_devices; j++) {
+      if (i == j) {
+        std::cout << std::setw(12) << std::left << "X";
+        continue;
+      }
+
+      if (gpu_links[i][j].cap.is_iolink_dma == UINT8_MAX) {
+        std::cout << std::setw(12) << std::left << "N/A";
+        continue;
+      }
+
+      std::cout << std::boolalpha;
+      std::cout << std::setw(12) << std::left
+                << static_cast<bool>(gpu_links[i][j].cap.is_iolink_bi_directional);
+    }
+    std::cout << std::endl;
+  }
+  std::cout << std::endl;
 }