diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index 319015062c..87df0a929b 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -3389,6 +3389,30 @@ rsmi_status_t rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *hops, RSMI_IO_LINK_TYPE *type); +/** + * @brief Return P2P availability status between 2 GPUs + * + * @details Given a source device index @p dv_ind_src and + * a destination device index @p dv_ind_dst, and a pointer to a + * bool @accessible, this function will write the P2P connection status + * between the device @p dv_ind_src and @p dv_ind_dst to the memory + * pointed to by @p accessible. + * + * @param[in] dv_ind_src the source device index + * + * @param[in] dv_ind_dst the destination device index + * + * @param[inout] accessible A pointer to a bool to which the status for + * the P2P connection availablity should be written. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * + */ +rsmi_status_t +rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst, + bool *accessible); + /** @} */ // end of HWTopo /*****************************************************************************/ diff --git a/include/rocm_smi/rocm_smi_io_link.h b/include/rocm_smi/rocm_smi_io_link.h index 124fc5ec02..6ba58d09bd 100755 --- a/include/rocm_smi/rocm_smi_io_link.h +++ b/include/rocm_smi/rocm_smi_io_link.h @@ -77,10 +77,15 @@ typedef enum _IO_LINK_TYPE { IOLINK_TYPE_SIZE = 0xFFFFFFFF } IO_LINK_TYPE; +typedef enum _LINK_DIRECTORY_TYPE { + IO_LINK_DIRECTORY = 0, + P2P_LINK_DIRECTORY = 1 +} LINK_DIRECTORY_TYPE; + class IOLink { public: - explicit IOLink(uint32_t node_indx, uint32_t link_indx) : - node_indx_(node_indx), link_indx_(link_indx) {} + explicit IOLink(uint32_t node_indx, uint32_t link_indx, LINK_DIRECTORY_TYPE link_dir_type) : + node_indx_(node_indx), link_indx_(link_indx), link_dir_type_(link_dir_type) {} ~IOLink(); int Initialize(); @@ -92,6 +97,7 @@ class IOLink { uint32_t node_from(void) const {return node_from_;} uint32_t node_to(void) const {return node_to_;} uint64_t weight(void) const {return weight_;} + LINK_DIRECTORY_TYPE get_directory_type(void) const {return link_dir_type_;} private: uint32_t node_indx_; @@ -101,16 +107,25 @@ class IOLink { uint32_t node_to_; uint64_t weight_; std::map properties_; + LINK_DIRECTORY_TYPE link_dir_type_; }; int DiscoverIOLinksPerNode(uint32_t node_indx, std::map> *links); +int +DiscoverP2PLinksPerNode(uint32_t node_indx, std::map> *links); + int DiscoverIOLinks(std::map, std::shared_ptr> *links); +int +DiscoverP2PLinks(std::map, + std::shared_ptr> *links); + } // namespace smi } // namespace amd diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index ac9e9d86e3..e806992028 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -3525,6 +3525,62 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, CATCH } +rsmi_status_t +rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst, + bool *accessible) { + TRY + + uint32_t dv_ind = dv_ind_src; + GET_DEV_AND_KFDNODE_FROM_INDX + + if (accessible == nullptr) { + return RSMI_STATUS_INVALID_ARGS; + } + + uint32_t node_ind_src, node_ind_dst; + // Fetch the source and destination GPU node index + if (smi.get_node_index(dv_ind_src, &node_ind_src) || + smi.get_node_index(dv_ind_dst, &node_ind_dst)) { + *accessible = false; + return RSMI_STATUS_INVALID_ARGS; + } + // If source device is same as destination, return true + if (dv_ind_src == dv_ind_dst) { + *accessible = true; + return RSMI_STATUS_SUCCESS; + } + std::map> io_link_map_tmp; + std::map>::iterator it; + // Iterate over P2P links + if (DiscoverP2PLinksPerNode(node_ind_src, &io_link_map_tmp) == 0) { + for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++) { + if(it->first == node_ind_dst) { + *accessible = true; + return RSMI_STATUS_SUCCESS; + } + } + io_link_map_tmp.clear(); + } else { + *accessible = false; + return RSMI_STATUS_FILE_ERROR; + } + // Iterate over IO links + if (DiscoverIOLinksPerNode(node_ind_src, &io_link_map_tmp) == 0) { + for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++) { + if(it->first == node_ind_dst) { + *accessible = true; + return RSMI_STATUS_SUCCESS; + } + } + } else { + *accessible = false; + return RSMI_STATUS_FILE_ERROR; + } + *accessible = false; + return RSMI_STATUS_SUCCESS; + CATCH +} + enum iterator_handle_type { FUNC_ITER = 0, VARIANT_ITER, diff --git a/src/rocm_smi_io_link.cc b/src/rocm_smi_io_link.cc index 6908e4149e..fc28e380be 100755 --- a/src/rocm_smi_io_link.cc +++ b/src/rocm_smi_io_link.cc @@ -62,6 +62,7 @@ namespace amd { namespace smi { static const char *kKFDNodesPathRoot = "/sys/class/kfd/kfd/topology/nodes"; +static const char *kKFDLinkPath[] = {"io_links", "p2p_links"}; // IO Link Property strings static const char *kIOLinkPropTYPEStr = "type"; @@ -82,24 +83,31 @@ static bool is_number(const std::string &s) { return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); } -static std::string IOLinkPathRoot(uint32_t node_indx) { - std::string io_link_path = kKFDNodesPathRoot; - io_link_path += '/'; - io_link_path += std::to_string(node_indx); - io_link_path += '/'; - io_link_path += "io_links"; - return io_link_path; +static std::string LinkPathRoot(uint32_t node_indx, + LINK_DIRECTORY_TYPE directory) { + std::string link_path_root = kKFDNodesPathRoot; + link_path_root += '/'; + link_path_root += std::to_string(node_indx); + link_path_root += '/'; + if (directory < sizeof(kKFDLinkPath)/sizeof(kKFDLinkPath[0])) { + link_path_root += kKFDLinkPath[directory]; + } else { + link_path_root = ""; + } + return link_path_root; } -static std::string IOLinkPath(uint32_t node_indx, uint32_t link_indx) { - std::string io_link_path = IOLinkPathRoot(node_indx); - io_link_path += '/'; - io_link_path += std::to_string(link_indx); - return io_link_path; +static std::string LinkPath(uint32_t node_indx, uint32_t link_indx, + LINK_DIRECTORY_TYPE directory) { + std::string link_path = LinkPathRoot(node_indx, directory); + link_path += '/'; + link_path += std::to_string(link_indx); + return link_path; } -static int OpenIOLinkProperties(uint32_t node_indx, uint32_t link_indx, - std::ifstream *fs) { +static int OpenLinkProperties(uint32_t node_indx, uint32_t link_indx, + std::ifstream *fs, + LINK_DIRECTORY_TYPE directory) { int ret; std::string f_path; bool reg_file; @@ -109,7 +117,7 @@ static int OpenIOLinkProperties(uint32_t node_indx, uint32_t link_indx, return EINVAL; } - f_path = IOLinkPath(node_indx, link_indx); + f_path = LinkPath(node_indx, link_indx, directory); f_path += "/"; f_path += "properties"; @@ -131,8 +139,9 @@ static int OpenIOLinkProperties(uint32_t node_indx, uint32_t link_indx, return 0; } -static int ReadIOLinkProperties(uint32_t node_indx, uint32_t link_indx, - std::vector *retVec) { +static int ReadLinkProperties(uint32_t node_indx, uint32_t link_indx, + std::vector *retVec, + LINK_DIRECTORY_TYPE directory) { std::string line; int ret; std::ifstream fs; @@ -142,7 +151,7 @@ static int ReadIOLinkProperties(uint32_t node_indx, uint32_t link_indx, return EINVAL; } - ret = OpenIOLinkProperties(node_indx, link_indx, &fs); + ret = OpenLinkProperties(node_indx, link_indx, &fs, directory); if (ret) { return ret; @@ -166,8 +175,9 @@ static int ReadIOLinkProperties(uint32_t node_indx, uint32_t link_indx, return 0; } -int DiscoverIOLinks(std::map, - std::shared_ptr> *links) { +static int DiscoverLinks(std::map, + std::shared_ptr> *links, + LINK_DIRECTORY_TYPE directory) { assert(links != nullptr); if (links == nullptr) { return EINVAL; @@ -201,9 +211,9 @@ int DiscoverIOLinks(std::map, uint32_t node_indx = static_cast(std::stoi(dentry_kfd->d_name)); std::shared_ptr link; uint32_t link_indx; - std::string io_link_path_root = IOLinkPathRoot(node_indx); + std::string link_path_root = LinkPathRoot(node_indx, directory); - auto io_link_dir = opendir(io_link_path_root.c_str()); + auto io_link_dir = opendir(link_path_root.c_str()); assert(io_link_dir != nullptr); auto dentry_io_link = readdir(io_link_dir); @@ -219,7 +229,8 @@ int DiscoverIOLinks(std::map, } link_indx = static_cast(std::stoi(dentry_io_link->d_name)); - link = std::shared_ptr(new IOLink(node_indx, link_indx)); + link = std::shared_ptr(new IOLink(node_indx, link_indx, + directory)); link->Initialize(); @@ -245,8 +256,19 @@ int DiscoverIOLinks(std::map, return 0; } -int DiscoverIOLinksPerNode(uint32_t node_indx, std::map> *links) { +int DiscoverIOLinks(std::map, + std::shared_ptr> *links) { + return DiscoverLinks(links, IO_LINK_DIRECTORY); +} + +int DiscoverP2PLinks(std::map, + std::shared_ptr> *links) { + return DiscoverLinks(links, P2P_LINK_DIRECTORY); +} + +static int DiscoverLinksPerNode(uint32_t node_indx, std::map> *links, + LINK_DIRECTORY_TYPE directory) { assert(links != nullptr); if (links == nullptr) { return EINVAL; @@ -257,9 +279,9 @@ int DiscoverIOLinksPerNode(uint32_t node_indx, std::map link; uint32_t link_indx; - std::string io_link_path_root = IOLinkPathRoot(node_indx); + std::string link_path_root = LinkPathRoot(node_indx, directory); - auto io_link_dir = opendir(io_link_path_root.c_str()); + auto io_link_dir = opendir(link_path_root.c_str()); assert(io_link_dir != nullptr); auto dentry = readdir(io_link_dir); @@ -275,7 +297,8 @@ int DiscoverIOLinksPerNode(uint32_t node_indx, std::map(std::stoi(dentry->d_name)); - link = std::shared_ptr(new IOLink(node_indx, link_indx)); + link = std::shared_ptr(new IOLink(node_indx, link_indx, + directory)); link->Initialize(); @@ -290,6 +313,16 @@ int DiscoverIOLinksPerNode(uint32_t node_indx, std::map> *links) { + return DiscoverLinksPerNode(node_indx, links, IO_LINK_DIRECTORY); +} + +int DiscoverP2PLinksPerNode(uint32_t node_indx, std::map> *links) { + return DiscoverLinksPerNode(node_indx, links, P2P_LINK_DIRECTORY); +} + IOLink::~IOLink() { } @@ -303,7 +336,8 @@ int IOLink::ReadProperties(void) { return 0; } - ret = ReadIOLinkProperties(node_indx_, link_indx_, &propVec); + ret = ReadLinkProperties(node_indx_, link_indx_, &propVec, + link_dir_type_); if (ret) { return ret;