From 33e8919743dce8b4d4af0bfcfe4e8dbeb964a308 Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Fri, 24 Jun 2022 23:33:27 -0500 Subject: [PATCH] Add hwloc5 dev headers to rocrtst. Allows easy building on platforms without native hwloc v1 support. Change-Id: I20d711f914d176decb1b64381fd4b51ccc4262b5 --- rocrtst/suites/test_common/CMakeLists.txt | 7 +- rocrtst/thirdparty/{ => include}/LICENSE | 0 rocrtst/thirdparty/include/hwloc.h | 2594 +++++++++++++++++ .../thirdparty/include/hwloc/autogen/config.h | 202 ++ rocrtst/thirdparty/include/hwloc/bitmap.h | 382 +++ rocrtst/thirdparty/include/hwloc/cpuset.h | 75 + rocrtst/thirdparty/include/hwloc/cuda.h | 224 ++ rocrtst/thirdparty/include/hwloc/cudart.h | 184 ++ rocrtst/thirdparty/include/hwloc/deprecated.h | 102 + rocrtst/thirdparty/include/hwloc/diff.h | 299 ++ rocrtst/thirdparty/include/hwloc/gl.h | 135 + .../thirdparty/include/hwloc/glibc-sched.h | 125 + rocrtst/thirdparty/include/hwloc/helper.h | 1311 +++++++++ rocrtst/thirdparty/include/hwloc/inlines.h | 154 + rocrtst/thirdparty/include/hwloc/intel-mic.h | 143 + .../thirdparty/include/hwloc/linux-libnuma.h | 355 +++ rocrtst/thirdparty/include/hwloc/linux.h | 77 + .../thirdparty/include/hwloc/myriexpress.h | 127 + rocrtst/thirdparty/include/hwloc/nvml.h | 176 ++ rocrtst/thirdparty/include/hwloc/opencl.h | 200 ++ .../include/hwloc/openfabrics-verbs.h | 157 + rocrtst/thirdparty/include/hwloc/plugins.h | 439 +++ rocrtst/thirdparty/include/hwloc/rename.h | 705 +++++ rocrtst/thirdparty/lib/LICENSE | 38 + rocrtst/thirdparty/{ => lib}/libhwloc.so.5 | Bin 25 files changed, 8208 insertions(+), 3 deletions(-) rename rocrtst/thirdparty/{ => include}/LICENSE (100%) create mode 100644 rocrtst/thirdparty/include/hwloc.h create mode 100644 rocrtst/thirdparty/include/hwloc/autogen/config.h create mode 100644 rocrtst/thirdparty/include/hwloc/bitmap.h create mode 100644 rocrtst/thirdparty/include/hwloc/cpuset.h create mode 100644 rocrtst/thirdparty/include/hwloc/cuda.h create mode 100644 rocrtst/thirdparty/include/hwloc/cudart.h create mode 100644 rocrtst/thirdparty/include/hwloc/deprecated.h create mode 100644 rocrtst/thirdparty/include/hwloc/diff.h create mode 100644 rocrtst/thirdparty/include/hwloc/gl.h create mode 100644 rocrtst/thirdparty/include/hwloc/glibc-sched.h create mode 100644 rocrtst/thirdparty/include/hwloc/helper.h create mode 100644 rocrtst/thirdparty/include/hwloc/inlines.h create mode 100644 rocrtst/thirdparty/include/hwloc/intel-mic.h create mode 100644 rocrtst/thirdparty/include/hwloc/linux-libnuma.h create mode 100644 rocrtst/thirdparty/include/hwloc/linux.h create mode 100644 rocrtst/thirdparty/include/hwloc/myriexpress.h create mode 100644 rocrtst/thirdparty/include/hwloc/nvml.h create mode 100644 rocrtst/thirdparty/include/hwloc/opencl.h create mode 100644 rocrtst/thirdparty/include/hwloc/openfabrics-verbs.h create mode 100644 rocrtst/thirdparty/include/hwloc/plugins.h create mode 100644 rocrtst/thirdparty/include/hwloc/rename.h create mode 100644 rocrtst/thirdparty/lib/LICENSE rename rocrtst/thirdparty/{ => lib}/libhwloc.so.5 (100%) diff --git a/rocrtst/suites/test_common/CMakeLists.txt b/rocrtst/suites/test_common/CMakeLists.txt index d71d75a57a..1151324c06 100755 --- a/rocrtst/suites/test_common/CMakeLists.txt +++ b/rocrtst/suites/test_common/CMakeLists.txt @@ -243,6 +243,7 @@ aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources) include_directories(${ROCRTST_ROOT}) include_directories(${ROCRTST_ROOT}/gtest/include) +include_directories(${ROCRTST_ROOT}/thirdparty/include/) # Custom command set for code objects. set (HSACO_TARG_LIST "") @@ -349,7 +350,7 @@ set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources} ${common_srcs} ${testCommonSources}) -target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt numa ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/libhwloc.so.5) +target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt numa ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/lib/libhwloc.so.5) #Build kernels add_custom_target(rocrtst_kernels ALL DEPENDS ${HSACO_TARG_LIST}) @@ -358,7 +359,7 @@ add_custom_target(rocrtst_kernels ALL DEPENDS ${HSACO_TARG_LIST}) add_custom_target(rocrtst_links ALL DEPENDS ${ROCRTST_LINKS_LIST} ) ## Set RUNPATH to pickup local copy of hwloc -set_property(TARGET ${ROCRTST} PROPERTY INSTALL_RPATH "$ORIGIN;$ORIGIN/thirdparty;$ORIGIN/../lib/rocrtst/thirdparty" ) +set_property(TARGET ${ROCRTST} PROPERTY INSTALL_RPATH "$ORIGIN;$ORIGIN/thirdparty/lib;$ORIGIN/../lib/rocrtst/thirdparty/lib" ) set_property(TARGET ${ROCRTST} PROPERTY LINK_FLAGS "-Wl,--enable-new-dtags") install(TARGETS ${ROCRTST} @@ -366,6 +367,6 @@ install(TARGETS ${ROCRTST} LIBRARY DESTINATION lib RUNTIME DESTINATION bin) -install ( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty DESTINATION lib/rocrtst ) +install ( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/lib DESTINATION lib/rocrtst ) include ( CPack ) diff --git a/rocrtst/thirdparty/LICENSE b/rocrtst/thirdparty/include/LICENSE similarity index 100% rename from rocrtst/thirdparty/LICENSE rename to rocrtst/thirdparty/include/LICENSE diff --git a/rocrtst/thirdparty/include/hwloc.h b/rocrtst/thirdparty/include/hwloc.h new file mode 100644 index 0000000000..8c77865b0d --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc.h @@ -0,0 +1,2594 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/*===================================================================== + * PLEASE GO READ THE DOCUMENTATION! + * ------------------------------------------------ + * $tarball_directory/doc/doxygen-doc/ + * or + * http://www.open-mpi.org/projects/hwloc/doc/ + *===================================================================== + * + * FAIR WARNING: Do NOT expect to be able to figure out all the + * subtleties of hwloc by simply reading function prototypes and + * constant descrptions here in this file. + * + * Hwloc has wonderful documentation in both PDF and HTML formats for + * your reading pleasure. The formal documentation explains a LOT of + * hwloc-specific concepts, provides definitions, and discusses the + * "big picture" for many of the things that you'll find here in this + * header file. + * + * The PDF/HTML documentation was generated via Doxygen; much of what + * you'll see in there is also here in this file. BUT THERE IS A LOT + * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h! + * + * There are entire paragraph-length descriptions, discussions, and + * pretty prictures to explain subtle corner cases, provide concrete + * examples, etc. + * + * Please, go read the documentation. :-) + * + * Moreover there are several examples of hwloc use under doc/examples + * in the source tree. + * + *=====================================================================*/ + +/** \file + * \brief The hwloc API. + * + * See hwloc/bitmap.h for bitmap specific macros. + * See hwloc/helper.h for high-level topology traversal helpers. + * See hwloc/inlines.h for the actual inline code of some functions below. + */ + +#ifndef HWLOC_H +#define HWLOC_H + +#include +#include +#include +#include +#include + +/* + * Symbol transforms + */ +#include + +/* + * Bitmap definitions + */ + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_api_version API version + * @{ + */ + +/** \brief Indicate at build time which hwloc API version is being used. + * + * This number is updated to (X>>16)+(Y>>8)+Z when a new release X.Y.Z + * actually modifies the API. + * + * Users may check for available features at build time using this number + * (see \ref faq_upgrade). + */ +#define HWLOC_API_VERSION 0x00010b06 + +/** \brief Indicate at runtime which hwloc API version was used at build time. + * + * Should be ::HWLOC_API_VERSION if running on the same version. + */ +HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); + +/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ +#define HWLOC_COMPONENT_ABI 4 + +/** @} */ + + + +/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t) + * + * Hwloc uses bitmaps to represent two distinct kinds of object sets: + * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t). + * These types are both typedefs to a common back end type + * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions + * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see + * \ref hwlocality_bitmap). + * + * The rationale for having two different types is that even though + * the actions one wants to perform on these types are the same (e.g., + * enable and disable individual items in the set/mask), they're used + * in very different contexts: one for specifying which processors to + * use and one for specifying which NUMA nodes to use. Hence, the + * name difference is really just to reflect the intent of where the + * type is used. + * + * @{ + */ + +/** \brief A CPU set is a bitmap whose bits are set according to CPU + * physical OS indexes. + * + * It may be consulted and modified with the bitmap API as any + * ::hwloc_bitmap_t (see hwloc/bitmap.h). + * + * Each bit may be converted into a PU object using + * hwloc_get_pu_obj_by_os_index(). + */ +typedef hwloc_bitmap_t hwloc_cpuset_t; +/** \brief A non-modifiable ::hwloc_cpuset_t. */ +typedef hwloc_const_bitmap_t hwloc_const_cpuset_t; + +/** \brief A node set is a bitmap whose bits are set according to NUMA + * memory node physical OS indexes. + * + * It may be consulted and modified with the bitmap API as any + * ::hwloc_bitmap_t (see hwloc/bitmap.h). + * Each bit may be converted into a NUMA node object using + * hwloc_get_numanode_obj_by_os_index(). + * + * When binding memory on a system without any NUMA node + * (when the whole memory is considered as a single memory bank), + * the nodeset may be either empty (no memory selected) + * or full (whole system memory selected). + * + * See also \ref hwlocality_helper_nodeset_convert. + */ +typedef hwloc_bitmap_t hwloc_nodeset_t; +/** \brief A non-modifiable ::hwloc_nodeset_t. + */ +typedef hwloc_const_bitmap_t hwloc_const_nodeset_t; + +/** @} */ + + + +/** \defgroup hwlocality_object_types Object Types + * @{ + */ + +/** \brief Type of topology object. + * + * \note Do not rely on the ordering or completeness of the values as new ones + * may be defined in the future! If you need to compare types, use + * hwloc_compare_types() instead. + */ +typedef enum { + /* *************************************************************** + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + + If new enum values are added here, you MUST also go update the + obj_type_order[] and obj_order_type[] arrays in src/topology.c. + + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + *************************************************************** */ + + HWLOC_OBJ_SYSTEM, /**< \brief Whole system (may be a cluster of machines). + * The whole system that is accessible to hwloc. + * That may comprise several machines in SSI systems + * like Kerrighed. + */ + HWLOC_OBJ_MACHINE, /**< \brief Machine. + * The typical root object type. + * A set of processors and memory with cache + * coherency. + */ + HWLOC_OBJ_NUMANODE, /**< \brief NUMA node. + * An object that contains memory that is directly + * and byte-accessible to the host processors. + * It is usually close to some cores (the corresponding objects + * are descendants of the NUMA node object in the hwloc tree). + * + * There is always at one such object in the topology + * even if the machine is not NUMA. + */ + HWLOC_OBJ_PACKAGE, /**< \brief Physical package. + * The physical package that usually gets inserted + * into a socket on the motherboard. + * A processor package usually contains multiple cores. + */ + HWLOC_OBJ_CACHE, /**< \brief Cache. + * Can be L1i, L1d, L2, L3, ... + */ + HWLOC_OBJ_CORE, /**< \brief Core. + * A computation unit (may be shared by several + * logical processors). + */ + HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor. + * An execution unit (may share a core with some + * other logical processors, e.g. in the case of + * an SMT core). + * + * Objects of this kind are always reported and can + * thus be used as fallback when others are not. + */ + + HWLOC_OBJ_GROUP, /**< \brief Group objects. + * Objects which do not fit in the above but are + * detected by hwloc and are useful to take into + * account for affinity. For instance, some operating systems + * expose their arbitrary processors aggregation this + * way. And hwloc may insert such objects to group + * NUMA nodes according to their distances. + * See also \ref faq_groups. + * + * These objects are ignored when they do not bring + * any structure. + */ + + HWLOC_OBJ_MISC, /**< \brief Miscellaneous objects. + * Objects without particular meaning, that can e.g. be + * added by the application for its own use, or by hwloc + * for miscellaneous objects such as MemoryModule (DIMMs). + */ + + HWLOC_OBJ_BRIDGE, /**< \brief Bridge. + * Any bridge that connects the host or an I/O bus, + * to another I/O bus. + * Bridge objects have neither CPU sets nor node sets. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + */ + HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device. + * These objects have neither CPU sets nor node sets. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + */ + HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device. + * These objects have neither CPU sets nor node sets. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + */ + + HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ + + /* *************************************************************** + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + + If new enum values are added here, you MUST also go update the + obj_type_order[] and obj_order_type[] arrays in src/topology.c. + + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + *************************************************************** */ +} hwloc_obj_type_t; + +/** \brief Cache type. */ +typedef enum hwloc_obj_cache_type_e { + HWLOC_OBJ_CACHE_UNIFIED, /**< \brief Unified cache. */ + HWLOC_OBJ_CACHE_DATA, /**< \brief Data cache. */ + HWLOC_OBJ_CACHE_INSTRUCTION /**< \brief Instruction cache. + * Only used when the ::HWLOC_TOPOLOGY_FLAG_ICACHES topology flag is set. */ +} hwloc_obj_cache_type_t; + +/** \brief Type of one side (upstream or downstream) of an I/O bridge. */ +typedef enum hwloc_obj_bridge_type_e { + HWLOC_OBJ_BRIDGE_HOST, /**< \brief Host-side of a bridge, only possible upstream. */ + HWLOC_OBJ_BRIDGE_PCI /**< \brief PCI-side of a bridge. */ +} hwloc_obj_bridge_type_t; + +/** \brief Type of a OS device. */ +typedef enum hwloc_obj_osdev_type_e { + HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. + * For instance "sda" on Linux. */ + HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. + * For instance ":0.0" for a GL display, + * "card0" for a Linux DRM device. */ + HWLOC_OBJ_OSDEV_NETWORK, /**< \brief Operating system network device. + * For instance the "eth0" interface on Linux. */ + HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device. + * For instance the "mlx4_0" InfiniBand HCA, + * or "hfi1_0" Omni-Path interface on Linux. */ + HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device. + * For instance the "dma0chan0" DMA channel on Linux. */ + HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device. + * For instance "mic0" for a Xeon Phi (MIC) on Linux, + * "opencl0d0" for a OpenCL device, + * "cuda0" for a CUDA device. */ +} hwloc_obj_osdev_type_t; + +/** \brief Compare the depth of two object types + * + * Types shouldn't be compared as they are, since newer ones may be added in + * the future. This function returns less than, equal to, or greater than zero + * respectively if \p type1 objects usually include \p type2 objects, are the + * same as \p type2 objects, or are included in \p type2 objects. If the types + * can not be compared (because neither is usually contained in the other), + * ::HWLOC_TYPE_UNORDERED is returned. Object types containing CPUs can always + * be compared (usually, a system contains machines which contain nodes which + * contain packages which contain caches, which contain cores, which contain + * processors). + * + * \note ::HWLOC_OBJ_PU will always be the deepest. + * \note This does not mean that the actual topology will respect that order: + * e.g. as of today cores may also contain caches, and packages may also contain + * nodes. This is thus just to be seen as a fallback comparison method. + */ +HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const; + +enum hwloc_compare_types_e { + HWLOC_TYPE_UNORDERED = INT_MAX /**< \brief Value returned by hwloc_compare_types() when types can not be compared. \hideinitializer */ +}; + +/** @} */ + + + +/** \defgroup hwlocality_objects Object Structure and Attributes + * @{ + */ + +union hwloc_obj_attr_u; + +/** \brief Object memory */ +struct hwloc_obj_memory_s { + hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in this object and its children */ + hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */ + + /** \brief Size of array \p page_types */ + unsigned page_types_len; + /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0. + * + * The array is sorted by increasing \p size fields. + * It contains \p page_types_len slots. + */ + struct hwloc_obj_memory_page_type_s { + hwloc_uint64_t size; /**< \brief Size of pages */ + hwloc_uint64_t count; /**< \brief Number of pages of this size */ + } * page_types; +}; + +/** \brief Structure of a topology object + * + * Applications must not modify any field except hwloc_obj.userdata. + */ +struct hwloc_obj { + /* physical information */ + hwloc_obj_type_t type; /**< \brief Type of object */ + + unsigned os_index; /**< \brief OS-provided physical index number. + * It is not guaranteed unique across the entire machine, + * except for PUs and NUMA nodes. + */ + char *name; /**< \brief Object-specific name if any. + * Mostly used for identifying OS devices and Misc objects where + * a name string is more useful than numerical indexes. + */ + + struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */ + + union hwloc_obj_attr_u *attr; /**< \brief Object type-specific Attributes, + * may be \c NULL if no attribute value was found */ + + /* global position */ + unsigned depth; /**< \brief Vertical index in the hierarchy. + * + * For normal objects, this is the depth of the horizontal level + * that contains this object and its cousins of the same type. + * If the topology is symmetric, this is equal to the parent depth + * plus one, and also equal to the number of parent/child links + * from the root object to here. + * + * For special objects (I/O and Misc) that are not + * in the main tree, this is a special negative value that + * corresponds to their dedicated level, + * see hwloc_get_type_depth() and ::hwloc_get_type_depth_e. + * Those special values can be passed to hwloc functions such + * hwloc_get_nbobjs_by_depth() as usual. + */ + unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects, + * hence guaranteed unique across the entire machine. + * Could be a "cousin_rank" since it's the rank within the "cousin" list below + */ + signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */ + + /* cousins are all objects of the same type (and depth) across the entire topology */ + struct hwloc_obj *next_cousin; /**< \brief Next object of same type and depth */ + struct hwloc_obj *prev_cousin; /**< \brief Previous object of same type and depth */ + + /* children of the same parent are siblings, even if they may have different type and depth */ + struct hwloc_obj *parent; /**< \brief Parent, \c NULL if root (system object) */ + unsigned sibling_rank; /**< \brief Index in parent's \c children[] array */ + struct hwloc_obj *next_sibling; /**< \brief Next object below the same parent */ + struct hwloc_obj *prev_sibling; /**< \brief Previous object below the same parent */ + + /* children array below this object */ + unsigned arity; /**< \brief Number of children */ + struct hwloc_obj **children; /**< \brief Children, \c children[0 .. arity -1] */ + struct hwloc_obj *first_child; /**< \brief First child */ + struct hwloc_obj *last_child; /**< \brief Last child */ + + /* misc */ + void *userdata; /**< \brief Application-given private data pointer, + * initialized to \c NULL, use it as you wish. + * See hwloc_topology_set_userdata_export_callback() + * if you wish to export this field to XML. */ + + /* cpusets and nodesets */ + hwloc_cpuset_t cpuset; /**< \brief CPUs covered by this object + * + * This is the set of CPUs for which there are PU objects in the topology + * under this object, i.e. which are known to be physically contained in this + * object and known how (the children path between this object and the PU + * objects). + * + * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of + * these CPUs may be offline, or not allowed for binding, see online_cpuset + * and allowed_cpuset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, + * + * This includes not only the same as the cpuset field, but also some CPUs for + * which topology information is unknown or incomplete, and the CPUs that are + * ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. + * Thus no corresponding PU object may be found in the topology, because the + * precise position is undefined. It is however known that it would be somewhere + * under this object. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t online_cpuset; /**< \brief The CPU set of online logical processors + * + * This includes the CPUs contained in this object that are online, i.e. draw + * power and can execute threads. It may however not be allowed to bind to + * them due to administration rules, see allowed_cpuset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t allowed_cpuset; /**< \brief The CPU set of allowed logical processors + * + * This includes the CPUs contained in this object which are allowed for + * binding, i.e. passing them to the hwloc binding functions should not return + * permission errors. This is usually restricted by administration rules. + * Some of them may however be offline so binding to them may still not be + * possible, see online_cpuset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + + hwloc_nodeset_t nodeset; /**< \brief NUMA nodes covered by this object or containing this object + * + * This is the set of NUMA nodes for which there are NUMA node objects in the + * topology under or above this object, i.e. which are known to be physically + * contained in this object or containing it and known how (the children path + * between this object and the NUMA node objects). + * + * In the end, these nodes are those that are close to the current object. + * + * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of + * these nodes may not be allowed for allocation, see allowed_nodeset. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so \p nodeset is full. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_nodeset_t complete_nodeset; /**< \brief The complete NUMA node set of this object, + * + * This includes not only the same as the nodeset field, but also some NUMA + * nodes for which topology information is unknown or incomplete, and the nodes + * that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. + * Thus no corresponding NUMA node object may be found in the topology, because the + * precise position is undefined. It is however known that it would be + * somewhere under this object. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so \p complete_nodeset is full. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_nodeset_t allowed_nodeset; /**< \brief The set of allowed NUMA memory nodes + * + * This includes the NUMA memory nodes contained in this object which are + * allowed for memory allocation, i.e. passing them to NUMA node-directed + * memory allocation should not return permission errors. This is usually + * restricted by administration rules. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so \p allowed_nodeset is full. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + + struct hwloc_distances_s **distances; /**< \brief Distances between all objects at same depth below this object */ + unsigned distances_count; + + struct hwloc_obj_info_s *infos; /**< \brief Array of stringified info type=name. */ + unsigned infos_count; /**< \brief Size of infos array. */ + + int symmetric_subtree; /**< \brief Set if the subtree of objects below this object is symmetric, + * which means all children and their children have identical subtrees. + * If set in the topology root object, lstopo may export the topology + * as a synthetic string. + */ +}; +/** + * \brief Convenience typedef; a pointer to a struct hwloc_obj. + */ +typedef struct hwloc_obj * hwloc_obj_t; + +/** \brief Object type-specific Attributes */ +union hwloc_obj_attr_u { + /** \brief Cache-specific Object Attributes */ + struct hwloc_cache_attr_s { + hwloc_uint64_t size; /**< \brief Size of cache in bytes */ + unsigned depth; /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */ + unsigned linesize; /**< \brief Cache-line size in bytes. 0 if unknown */ + int associativity; /**< \brief Ways of associativity, + * -1 if fully associative, 0 if unknown */ + hwloc_obj_cache_type_t type; /**< \brief Cache type */ + } cache; + /** \brief Group-specific Object Attributes */ + struct hwloc_group_attr_s { + unsigned depth; /**< \brief Depth of group object */ + } group; + /** \brief PCI Device specific Object Attributes */ + struct hwloc_pcidev_attr_s { + unsigned short domain; + unsigned char bus, dev, func; + unsigned short class_id; + unsigned short vendor_id, device_id, subvendor_id, subdevice_id; + unsigned char revision; + float linkspeed; /* in GB/s */ + } pcidev; + /** \brief Bridge specific Object Attribues */ + struct hwloc_bridge_attr_s { + union { + struct hwloc_pcidev_attr_s pci; + } upstream; + hwloc_obj_bridge_type_t upstream_type; + union { + struct { + unsigned short domain; + unsigned char secondary_bus, subordinate_bus; + } pci; + } downstream; + hwloc_obj_bridge_type_t downstream_type; + unsigned depth; + } bridge; + /** \brief OS Device specific Object Attributes */ + struct hwloc_osdev_attr_s { + hwloc_obj_osdev_type_t type; + } osdev; +}; + +/** \brief Distances between objects + * + * One object may contain a distance structure describing distances + * between all its descendants at a given relative depth. If the + * containing object is the root object of the topology, then the + * distances are available for all objects in the machine. + * + * If the \p latency pointer is not \c NULL, the pointed array contains + * memory latencies (non-zero values), see below. + * + * In the future, some other types of distances may be considered. + * In these cases, \p latency may be \c NULL. + */ +struct hwloc_distances_s { + unsigned relative_depth; /**< \brief Relative depth of the considered objects + * below the object containing this distance information. */ + unsigned nbobjs; /**< \brief Number of objects considered in the matrix. + * It is the number of descendant objects at \p relative_depth + * below the containing object. + * It corresponds to the result of hwloc_get_nbobjs_inside_cpuset_by_depth(). */ + + float *latency; /**< \brief Matrix of latencies between objects, stored as a one-dimension array. + * May be \c NULL if the distances considered here are not latencies. + * + * Unless defined by the user, this currently contains latencies + * between NUMA nodes (as reported in the System Locality Distance Information Table + * (SLIT) in the ACPI specification), which may or may not be accurate. + * It corresponds to the latency for accessing the memory of one node + * from a core in another node. + * + * Values are normalized to get 1.0 as the minimal value in the matrix. + * Latency from i-th to j-th object is stored in slot i*nbobjs+j. + */ + float latency_max; /**< \brief The maximal value in the latency matrix. */ + float latency_base; /**< \brief The multiplier that should be applied to latency matrix + * to retrieve the original OS-provided latencies. + * Usually 10 on Linux since ACPI SLIT uses 10 for local latency. + */ +}; + +/** \brief Object info + * + * \sa hwlocality_info_attr + */ +struct hwloc_obj_info_s { + char *name; /**< \brief Info name */ + char *value; /**< \brief Info value */ +}; + +/** @} */ + + + +/** \defgroup hwlocality_creation Topology Creation and Destruction + * @{ + */ + +struct hwloc_topology; +/** \brief Topology context + * + * To be initialized with hwloc_topology_init() and built with hwloc_topology_load(). + */ +typedef struct hwloc_topology * hwloc_topology_t; + +/** \brief Allocate a topology context. + * + * \param[out] topologyp is assigned a pointer to the new allocated context. + * + * \return 0 on success, -1 on error. + */ +HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp); + +/** \brief Build the actual topology + * + * Build the actual topology once initialized with hwloc_topology_init() and + * tuned with \ref hwlocality_configuration routines. + * No other routine may be called earlier using this topology context. + * + * \param topology is the topology to be loaded with objects. + * + * \return 0 on success, -1 on error. + * + * \note On failure, the topology is reinitialized. It should be either + * destroyed with hwloc_topology_destroy() or configured and loaded again. + * + * \note This function may be called only once per topology. + * + * \note The binding of the current thread or process may temporarily change + * during this call but it will be restored before it returns. + * + * \sa hwlocality_configuration + */ +HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology); + +/** \brief Terminate and free a topology context + * + * \param topology is the topology to be freed + */ +HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology); + +/** \brief Duplicate a topology. + * + * The entire topology structure as well as its objects + * are duplicated into a new one. + * + * This is useful for keeping a backup while modifying a topology. + * + * \note Object userdata is not duplicated since hwloc does not know what it point to. + * The objects of both old and new topologies will point to the same userdata. + */ +HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology); + +/** \brief Run internal checks on a topology structure + * + * The program aborts if an inconsistency is detected in the given topology. + * + * \param topology is the topology to be checked + * + * \note This routine is only useful to developers. + * + * \note The input topology should have been previously loaded with + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology); + +/** @} */ + + + +/** \defgroup hwlocality_configuration Topology Detection Configuration and Query + * + * Several functions can optionally be called between hwloc_topology_init() and + * hwloc_topology_load() to configure how the detection should be performed, + * e.g. to ignore some objects types, define a synthetic topology, etc. + * + * If none of them is called, the default is to detect all the objects of the + * machine that the caller is allowed to access. + * + * This default behavior may also be modified through environment variables + * if the application did not modify it already. + * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML + * file as if hwloc_topology_set_xml() had been called. + * HWLOC_FSROOT switches to reading the topology from the specified Linux + * filesystem root as if hwloc_topology_set_fsroot() had been called. + * Finally, HWLOC_THISSYSTEM enforces the return value of + * hwloc_topology_is_thissystem(). + * + * @{ + */ + +/** \brief Ignore an object type. + * + * Ignore all objects from the given type. + * The bottom-level type ::HWLOC_OBJ_PU may not be ignored. + * The top-level object of the hierarchy will never be ignored, even if this function + * succeeds. + * Group objects are always ignored if they do not bring any structure + * since they are designed to add structure to the topology. + * I/O objects may not be ignored, topology flags should be used to configure + * their discovery instead. + */ +HWLOC_DECLSPEC int hwloc_topology_ignore_type(hwloc_topology_t topology, hwloc_obj_type_t type); + +/** \brief Ignore an object type if it does not bring any structure. + * + * Ignore all objects from the given type as long as they do not bring any structure: + * Each ignored object should have a single children or be the only child of its parent. + * The bottom-level type ::HWLOC_OBJ_PU may not be ignored. + * I/O objects may not be ignored, topology flags should be used to configure + * their discovery instead. + */ +HWLOC_DECLSPEC int hwloc_topology_ignore_type_keep_structure(hwloc_topology_t topology, hwloc_obj_type_t type); + +/** \brief Ignore all objects that do not bring any structure. + * + * Ignore all objects that do not bring any structure: + * This is equivalent to calling hwloc_topology_ignore_type_keep_structure() + * for all object types. + */ +HWLOC_DECLSPEC int hwloc_topology_ignore_all_keep_structure(hwloc_topology_t topology); + +/** \brief Flags to be set onto a topology context before load. + * + * Flags should be given to hwloc_topology_set_flags(). + * They may also be returned by hwloc_topology_get_flags(). + */ +enum hwloc_topology_flags_e { + /** \brief Detect the whole system, ignore reservations and offline settings. + * + * Gather all resources, even if some were disabled by the administrator. + * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes, + * and ignore the fact that some resources may be offline. + * + * When this flag is not set, PUs that are disallowed are not added to the topology. + * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. + * NUMA nodes are always added but their available memory is set to 0 when disallowed. + * + * If the current topology is exported to XML and reimported later, this flag + * should be set again in the reimported topology so that disallowed resources + * are reimported as well. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), + + /** \brief Assume that the selected backend provides the topology for the + * system on which we are running. + * + * This forces hwloc_topology_is_thissystem() to return 1, i.e. makes hwloc assume that + * the selected backend provides the topology for the system on which we are running, + * even if it is not the OS-specific backend but the XML backend for instance. + * This means making the binding functions actually call the OS-specific + * system calls and really do binding, while the XML backend would otherwise + * provide empty hooks just returning success. + * + * Setting the environment variable HWLOC_THISSYSTEM may also result in the + * same behavior. + * + * This can be used for efficiency reasons to first detect the topology once, + * save it to an XML file, and quickly reload it later through the XML + * backend, but still having binding functions actually do bind. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1), + + /** \brief Detect PCI devices. + * + * By default, I/O devices are ignored. This flag enables I/O device + * detection using the pci backend. Only the common PCI devices (GPUs, + * NICs, block devices, ...) and host bridges (objects that connect the host + * objects to an I/O subsystem) will be added to the topology. + * Additionally it also enables MemoryModule misc objects. + * Uncommon devices and other bridges (such as PCI-to-PCI bridges) will be + * ignored. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IO_DEVICES = (1UL<<2), + + /** \brief Detect PCI bridges. + * + * This flag should be combined with ::HWLOC_TOPOLOGY_FLAG_IO_DEVICES to enable + * the detection of both common devices and of all useful bridges (bridges that + * have at least one device behind them). + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IO_BRIDGES = (1UL<<3), + + /** \brief Detect the whole PCI hierarchy. + * + * This flag enables detection of all I/O devices (even the uncommon ones + * such as DMA channels) and bridges (even those that have no device behind + * them) using the pci backend. + * This implies ::HWLOC_TOPOLOGY_FLAG_IO_DEVICES. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_WHOLE_IO = (1UL<<4), + + /** \brief Detect instruction caches. + * + * This flag enables detection of Instruction caches, + * instead of only Data and Unified caches. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5), + + /** \brief Get the set of allowed resources from the local operating system even if the topology was loaded from XML or synthetic description. + * + * If the topology was loaded from XML or from a synthetic string, + * restrict it by applying the current process restrictions such as + * Linux Cgroup/Cpuset. + * + * This is useful when the topology is not loaded directly from + * the local machine (e.g. for performance reason) and it comes + * with all resources, while the running process is restricted + * to only parts of the machine. + * + * This flag is ignored unless ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM is + * also set since the loaded topology must match the underlying machine + * where restrictions will be gathered from. + * + * Setting the environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES + * would result in the same behavior. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<6) +}; + +/** \brief Set OR'ed flags to non-yet-loaded topology. + * + * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded. + * + * If this function is called multiple times, the last invokation will erase + * and replace the set of flags that was previously set. + * + * The flags set in a topology may be retrieved with hwloc_topology_get_flags() + */ +HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags); + +/** \brief Get OR'ed flags of a topology. + * + * Get the OR'ed set of ::hwloc_topology_flags_e of a topology. + * + * \return the flags previously set with hwloc_topology_set_flags(). + */ +HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology); + +/** \brief Change which process the topology is viewed from + * + * On some systems, processes may have different views of the machine, for + * instance the set of allowed CPUs. By default, hwloc exposes the view from + * the current process. Calling hwloc_topology_set_pid() permits to make it + * expose the topology of the machine from the point of view of another + * process. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note -1 is returned and errno is set to ENOSYS on platforms that do not + * support this feature. + */ +HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid); + +/** \brief Change the file-system root path when building the topology from sysfs/procfs. + * + * On Linux system, use sysfs and procfs files as if they were mounted on the given + * \p fsroot_path instead of the main file-system root. Setting the environment + * variable HWLOC_FSROOT may also result in this behavior. + * Not using the main file-system root causes hwloc_topology_is_thissystem() + * to return 0. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to ENOSYS on non-Linux and on Linux systems that + * do not support it. + * \return -1 with the appropriate errno if \p fsroot_path cannot be used. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the Linux component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_fsroot(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict fsroot_path); + +/** \brief Enable synthetic topology. + * + * Gather topology information from the given \p description, + * a space-separated string of numbers describing + * the arity of each level. + * Each number may be prefixed with a type and a colon to enforce the type + * of a level. If only some level types are enforced, hwloc will try to + * choose the other types according to usual topologies, but it may fail + * and you may have to specify more level types manually. + * See also the \ref synthetic. + * + * If \p description was properly parsed and describes a valid topology + * configuration, this function returns 0. + * Otherwise -1 is returned and errno is set to EINVAL. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. + * + * \note On success, the synthetic component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description); + +/** \brief Enable XML-file based topology. + * + * Gather topology information from the XML file given at \p xmlpath. + * Setting the environment variable HWLOC_XMLFILE may also result in this behavior. + * This file may have been generated earlier with hwloc_topology_export_xml() + * or lstopo file.xml. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to EINVAL on failure to read the XML file. + * + * \note See also hwloc_topology_set_userdata_import_callback() + * for importing application-specific object userdata. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the XML component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath); + +/** \brief Enable XML based topology using a memory buffer (instead of + * a file, as with hwloc_topology_set_xml()). + * + * Gather topology information from the XML memory buffer given at \p + * buffer and of length \p size. This buffer may have been filled + * earlier with hwloc_topology_export_xmlbuffer(). + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to EINVAL on failure to read the XML buffer. + * + * \note See also hwloc_topology_set_userdata_import_callback() + * for importing application-specific object userdata. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the XML component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); + +/** \brief Prepare the topology for custom assembly. + * + * The topology then contains a single root object. + * It must then be built by inserting other topologies with + * hwloc_custom_insert_topology() or single objects with + * hwloc_custom_insert_group_object_by_parent(). + * hwloc_topology_load() must be called to finalize the new + * topology as usual. + * + * \note If nothing is inserted in the topology, + * hwloc_topology_load() will fail with errno set to EINVAL. + * + * \note The cpuset and nodeset of the root object are NULL because + * these sets are meaningless when assembling multiple topologies. + * + * \note On success, the custom component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_custom(hwloc_topology_t topology); + +/** \brief Provide a distance matrix. + * + * Provide the matrix of distances between a set of objects of the given type. + * \p nbobjs must be at least 2. + * The set may or may not contain all the existing objects of this type. + * The objects are specified by their OS/physical index in the \p os_index + * array. The \p distances matrix follows the same order. + * The distance from object i to object j in the i*nbobjs+j. + * + * A single latency matrix may be defined for each type. + * If another distance matrix already exists for the given type, + * either because the user specified it or because the OS offers it, + * it will be replaced by the given one. + * If \p nbobjs is \c 0, \p os_index is \c NULL and \p distances is \c NULL, + * the existing distance matrix for the given type is removed. + * + * \note Distance matrices are ignored in multi-node topologies. + */ +HWLOC_DECLSPEC int hwloc_topology_set_distance_matrix(hwloc_topology_t __hwloc_restrict topology, + hwloc_obj_type_t type, unsigned nbobjs, + unsigned *os_index, float *distances); + +/** \brief Does the topology context come from this system? + * + * \return 1 if this topology context was built using the system + * running this program. + * \return 0 instead (for instance if using another file-system root, + * a XML topology file, or a synthetic topology). + */ +HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; + +/** \brief Flags describing actual discovery support for this topology. */ +struct hwloc_topology_discovery_support { + /** \brief Detecting the number of PU objects is supported. */ + unsigned char pu; +}; + +/** \brief Flags describing actual PU binding support for this topology. + * + * A flag may be set even if the feature isn't supported in all cases + * (e.g. binding to random sets of non-contiguous objects). + */ +struct hwloc_topology_cpubind_support { + /** Binding the whole current process is supported. */ + unsigned char set_thisproc_cpubind; + /** Getting the binding of the whole current process is supported. */ + unsigned char get_thisproc_cpubind; + /** Binding a whole given process is supported. */ + unsigned char set_proc_cpubind; + /** Getting the binding of a whole given process is supported. */ + unsigned char get_proc_cpubind; + /** Binding the current thread only is supported. */ + unsigned char set_thisthread_cpubind; + /** Getting the binding of the current thread only is supported. */ + unsigned char get_thisthread_cpubind; + /** Binding a given thread only is supported. */ + unsigned char set_thread_cpubind; + /** Getting the binding of a given thread only is supported. */ + unsigned char get_thread_cpubind; + /** Getting the last processors where the whole current process ran is supported */ + unsigned char get_thisproc_last_cpu_location; + /** Getting the last processors where a whole process ran is supported */ + unsigned char get_proc_last_cpu_location; + /** Getting the last processors where the current thread ran is supported */ + unsigned char get_thisthread_last_cpu_location; +}; + +/** \brief Flags describing actual memory binding support for this topology. + * + * A flag may be set even if the feature isn't supported in all cases + * (e.g. binding to random sets of non-contiguous objects). + */ +struct hwloc_topology_membind_support { + /** Binding the whole current process is supported. */ + unsigned char set_thisproc_membind; + /** Getting the binding of the whole current process is supported. */ + unsigned char get_thisproc_membind; + /** Binding a whole given process is supported. */ + unsigned char set_proc_membind; + /** Getting the binding of a whole given process is supported. */ + unsigned char get_proc_membind; + /** Binding the current thread only is supported. */ + unsigned char set_thisthread_membind; + /** Getting the binding of the current thread only is supported. */ + unsigned char get_thisthread_membind; + /** Binding a given memory area is supported. */ + unsigned char set_area_membind; + /** Getting the binding of a given memory area is supported. */ + unsigned char get_area_membind; + /** Allocating a bound memory area is supported. */ + unsigned char alloc_membind; + /** First-touch policy is supported. */ + unsigned char firsttouch_membind; + /** Bind policy is supported. */ + unsigned char bind_membind; + /** Interleave policy is supported. */ + unsigned char interleave_membind; + /** Replication policy is supported. */ + unsigned char replicate_membind; + /** Next-touch migration policy is supported. */ + unsigned char nexttouch_membind; + /** Migration flags is supported. */ + unsigned char migrate_membind; + /** Getting the last NUMA nodes where a memory area was allocated is supported */ + unsigned char get_area_memlocation; +}; + +/** \brief Set of flags describing actual support for this topology. + * + * This is retrieved with hwloc_topology_get_support() and will be valid until + * the topology object is destroyed. Note: the values are correct only after + * discovery. + */ +struct hwloc_topology_support { + struct hwloc_topology_discovery_support *discovery; + struct hwloc_topology_cpubind_support *cpubind; + struct hwloc_topology_membind_support *membind; +}; + +/** \brief Retrieve the topology support. + * + * Each flag indicates whether a feature is supported. + * If set to 0, the feature is not supported. + * If set to 1, the feature is supported, but the corresponding + * call may still fail in some corner cases. + * + * These features are also listed by hwloc-info \--support + */ +HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); + +/** \brief Set the topology-specific userdata pointer. + * + * Each topology may store one application-given private data pointer. + * It is initialized to \c NULL. + * hwloc will never modify it. + * + * Use it as you wish, after hwloc_topology_init() and until hwloc_topolog_destroy(). + * + * This pointer is not exported to XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const void *userdata); + +/** \brief Retrieve the topology-specific userdata pointer. + * + * Retrieve the application-given private data pointer that was + * previously set with hwloc_topology_set_userdata(). + */ +HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology); + +/** @} */ + + + +/** \defgroup hwlocality_levels Object levels, depths and types + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Get the depth of the hierarchical tree of objects. + * + * This is the depth of ::HWLOC_OBJ_PU objects plus one. + * + * \note I/O and Misc objects are ignored when computing the depth + * of the tree (they are placed on special levels, or none). + */ +HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; + +/** \brief Returns the depth of objects of type \p type. + * + * If no object of this type is present on the underlying architecture, or if + * the OS doesn't provide this kind of information, the function returns + * ::HWLOC_TYPE_DEPTH_UNKNOWN. + * + * If type is absent but a similar type is acceptable, see also + * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth(). + * + * If some objects of the given type exist in different levels, + * for instance L1 and L2 caches, or L1i and L1d caches, + * the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE. + * See hwloc_get_cache_type_depth() in hwloc/helper.h to better handle this + * case. + * + * If an I/O object type is given, the function returns a virtual value + * because I/O objects are stored in special levels that are not CPU-related. + * This virtual depth may be passed to other hwloc functions such as + * hwloc_get_obj_by_depth() but it should not be considered as an actual + * depth by the application. In particular, it should not be compared with + * any other object depth or with the entire topology depth. + * + * If ::HWLOC_OBJ_MISC is given, the function returns ::HWLOC_TYPE_DEPTH_UNKNOWN. + */ +HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type); + +enum hwloc_get_type_depth_e { + HWLOC_TYPE_DEPTH_UNKNOWN = -1, /**< \brief No object of given type exists in the topology. \hideinitializer */ + HWLOC_TYPE_DEPTH_MULTIPLE = -2, /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */ + HWLOC_TYPE_DEPTH_BRIDGE = -3, /**< \brief Virtual depth for bridge object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_OS_DEVICE = -5 /**< \brief Virtual depth for software device object level. \hideinitializer */ +}; + +/** \brief Returns the depth of objects of type \p type or below + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically found + * inside \p type. + * + * If some objects of the given type exist in different levels, for instance + * L1 and L2 caches, the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE. + */ +static __hwloc_inline int +hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the depth of objects of type \p type or above + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically + * containing \p type. + * + * If some objects of the given type exist in different levels, for instance + * L1 and L2 caches, the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE. + */ +static __hwloc_inline int +hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the type of objects at depth \p depth. + * + * \p depth should between 0 and hwloc_topology_get_depth()-1. + * + * \return -1 if depth \p depth does not exist. + */ +HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure; + +/** \brief Returns the width of level at depth \p depth. + */ +HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure; + +/** \brief Returns the width of level type \p type + * + * If no object for that type exists, 0 is returned. + * If there are several levels with objects of that type, -1 is returned. + */ +static __hwloc_inline int +hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the top-object of the topology-tree. + * + * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different + * for complex topologies. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Returns the topology object at logical index \p idx from depth \p depth */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, unsigned depth, unsigned idx) __hwloc_attribute_pure; + +/** \brief Returns the topology object at logical index \p idx with type \p type + * + * If no object for that type exists, \c NULL is returned. + * If there are several levels with objects of that type, \c NULL is returned + * and ther caller may fallback to hwloc_get_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; + +/** \brief Returns the next object at depth \p depth. + * + * If \p prev is \c NULL, return the first object at depth \p depth. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev); + +/** \brief Returns the next object of type \p type. + * + * If \p prev is \c NULL, return the first object at type \p type. If + * there are multiple or no depth for given type, return \c NULL and + * let the caller fallback to hwloc_get_next_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, + hwloc_obj_t prev); + +/** @} */ + + + +/** \defgroup hwlocality_object_strings Converting between Object Types, Sets and Attributes, and Strings + * @{ + */ + +/** \brief Return a constant stringified object type. + * + * This function is the basic way to convert a generic type into a string. + * + * hwloc_obj_type_snprintf() may return a more precise output for a specific + * object, but it requires the caller to provide the output buffer. + */ +HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const; + +/** \brief Stringify the type of a given topology object into a human-readable form. + * + * Contrary to hwloc_obj_type_string(), this function includes object-specific + * attributes (such as the Group depth, the Bridge type, or OS device type) + * in the output, and it requires the caller to provide the output buffer. + * + * The output is guaranteed to be the same for all objects of a same topology level. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, + int verbose); + +/** \brief Stringify the attributes of a given topology object into a human-readable form. + * + * Attribute values are separated by \p separator. + * + * Only the major attributes are printed in non-verbose mode. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * __hwloc_restrict separator, + int verbose); + +/** \brief Stringify the cpuset containing a set of objects. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_cpuset_snprintf(char * __hwloc_restrict str, size_t size, size_t nobj, const hwloc_obj_t * __hwloc_restrict objs); + +/** \brief Return an object type and attributes from a type string. + * + * Convert strings such as "Package" or "Cache" into the corresponding types. + * Matching is case-insensitive, and only the first letters are actually + * required to match. + * + * This function is guaranteed to match any string returned by hwloc_obj_type_string() + * or hwloc_obj_type_snprintf(). + * + * Types that have specific attributes, for instance caches and groups, + * may be returned in \p depthattrp and \p typeattrp. They are ignored + * when these pointers are \c NULL. + * + * For instance "L2i" or "L2iCache" would return + * type HWLOC_OBJ_CACHE in \p typep, 2 in \p depthattrp, + * and HWLOC_OBJ_CACHE_TYPE_INSTRUCTION in \p typeattrp + * (this last pointer should point to a hwloc_obj_cache_type_t). + * "Group3" would return type HWLOC_OBJ_GROUP type and 3 in \p depthattrp. + * Attributes that are not specified in the string (for instance "Group" + * without a depth, or "L2Cache" without a cache type) are set to -1. + * + * \p typeattrp is only filled if the size specified in \p typeattrsize + * is large enough. It is currently only used for caches, and the required + * size is at least the size of hwloc_obj_cache_type_t. + * + * \return 0 if a type was correctly identified, otherwise -1. + * + * \note This is an extended version of the now deprecated hwloc_obj_type_of_string() + */ +HWLOC_DECLSPEC int hwloc_obj_type_sscanf(const char *string, + hwloc_obj_type_t *typep, + int *depthattrp, + void *typeattrp, size_t typeattrsize); + +/** @} */ + + + +/** \defgroup hwlocality_info_attr Consulting and Adding Key-Value Info Attributes + * + * @{ + */ + +/** \brief Search the given key name in object infos and return the corresponding value. + * + * If multiple keys match the given name, only the first one is returned. + * + * \return \c NULL if no such key exists. + */ +static __hwloc_inline const char * +hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure; + +/** \brief Add the given info name and value pair to the given object. + * + * The info is appended to the existing info array even if another key + * with the same name already exists. + * + * The input strings are copied before being added in the object infos. + * + * \note This function may be used to enforce object colors in the lstopo + * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb" + * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details. + * + * \note If \p value contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml(). + */ +HWLOC_DECLSPEC void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value); + +/** @} */ + + + +/** \defgroup hwlocality_cpubinding CPU binding + * + * It is often useful to call hwloc_bitmap_singlify() first so that a single CPU + * remains in the set. This way, the process will not even migrate between + * different CPUs inside the given set. + * Some operating systems also only support that kind of binding. + * + * Some operating systems do not provide all hwloc-supported + * mechanisms to bind processes, threads, etc. + * hwloc_topology_get_support() may be used to query about the actual CPU + * binding support in the currently used operating system. + * + * When the requested binding operation is not available and the + * ::HWLOC_CPUBIND_STRICT flag was passed, the function returns -1. + * \p errno is set to \c ENOSYS when it is not possible to bind the requested kind of object + * processes/threads. errno is set to \c EXDEV when the requested cpuset + * can not be enforced (e.g. some systems only allow one CPU, and some + * other systems only allow one NUMA node). + * + * If ::HWLOC_CPUBIND_STRICT was not passed, the function may fail as well, + * or the operating system may use a slightly different operation + * (with side-effects, smaller binding set, etc.) + * when the requested operation is not exactly supported. + * + * The most portable version that should be preferred over the others, + * whenever possible, is the following one which just binds the current program, + * assuming it is single-threaded: + * + * \code + * hwloc_set_cpubind(topology, set, 0), + * \endcode + * + * If the program may be multithreaded, the following one should be preferred + * to only bind the current thread: + * + * \code + * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD), + * \endcode + * + * \sa Some example codes are available under doc/examples/ in the source tree. + * + * \note To unbind, just call the binding function with either a full cpuset or + * a cpuset equal to the system cpuset. + * + * \note On some operating systems, CPU binding may have effects on memory binding, see + * ::HWLOC_CPUBIND_NOMEMBIND + * + * \note Running lstopo \--top or hwloc-ps can be a very convenient tool to check + * how binding actually happened. + * @{ + */ + +/** \brief Process/Thread binding flags. + * + * These bit flags can be used to refine the binding policy. + * + * The default (0) is to bind the current process, assumed to be + * single-threaded, in a non-strict way. This is the most portable + * way to bind as all operating systems usually provide it. + * + * \note Not all systems support all kinds of binding. See the + * "Detailed Description" section of \ref hwlocality_cpubinding for a + * description of errors that can occur. + */ +typedef enum { + /** \brief Bind all threads of the current (possibly) multithreaded process. + * \hideinitializer */ + HWLOC_CPUBIND_PROCESS = (1<<0), + + /** \brief Bind current thread of current process. + * \hideinitializer */ + HWLOC_CPUBIND_THREAD = (1<<1), + + /** \brief Request for strict binding from the OS. + * + * By default, when the designated CPUs are all busy while other + * CPUs are idle, operating systems may execute the thread/process + * on those other CPUs instead of the designated CPUs, to let them + * progress anyway. Strict binding means that the thread/process + * will _never_ execute on other cpus than the designated CPUs, even + * when those are busy with other tasks and other CPUs are idle. + * + * \note Depending on the operating system, strict binding may not + * be possible (e.g., the OS does not implement it) or not allowed + * (e.g., for an administrative reasons), and the function will fail + * in that case. + * + * When retrieving the binding of a process, this flag checks + * whether all its threads actually have the same binding. If the + * flag is not given, the binding of each thread will be + * accumulated. + * + * \note This flag is meaningless when retrieving the binding of a + * thread. + * \hideinitializer + */ + HWLOC_CPUBIND_STRICT = (1<<2), + + /** \brief Avoid any effect on memory binding + * + * On some operating systems, some CPU binding function would also + * bind the memory on the corresponding NUMA node. It is often not + * a problem for the application, but if it is, setting this flag + * will make hwloc avoid using OS functions that would also bind + * memory. This will however reduce the support of CPU bindings, + * i.e. potentially return -1 with errno set to ENOSYS in some + * cases. + * + * This flag is only meaningful when used with functions that set + * the CPU binding. It is ignored when used with functions that get + * CPU binding information. + * \hideinitializer + */ + HWLOC_CPUBIND_NOMEMBIND = (1<<3) +} hwloc_cpubind_flags_t; + +/** \brief Bind current process or thread on cpus given in physical bitmap \p set. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); + +/** \brief Get current process or thread binding. + * + * Writes into \p set the physical cpuset which the process or thread (according to \e + * flags) was last bound to. + */ +HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + +/** \brief Bind a process \p pid on cpus given in physical bitmap \p set. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the binding is applied to that specific thread. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags); + +/** \brief Get the current physical binding of process \p pid. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the binding for that specific thread is returned. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + +#ifdef hwloc_thread_t +/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set. + * + * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags); +#endif + +#ifdef hwloc_thread_t +/** \brief Get the current physical binding of thread \p tid. + * + * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags); +#endif + +/** \brief Get the last physical CPU where the current process or thread ran. + * + * The operating system may move some tasks from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * \p flags can include either ::HWLOC_CPUBIND_PROCESS or ::HWLOC_CPUBIND_THREAD to + * specify whether the query should be for the whole process (union of all CPUs + * on which all threads are running), or only the current thread. If the + * process is single-threaded, flags can be set to zero to let hwloc use + * whichever method is available on the underlying OS. + */ +HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + +/** \brief Get the last physical CPU where a process ran. + * + * The operating system may move some tasks from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the last CPU location of that specific thread is returned. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + +/** @} */ + + + +/** \defgroup hwlocality_membinding Memory binding + * + * Memory binding can be done three ways: + * + * - explicit memory allocation thanks to hwloc_alloc_membind() and friends: + * the binding will have effect on the memory allocated by these functions. + * - implicit memory binding through binding policy: hwloc_set_membind() and + * friends only define the current policy of the process, which will be + * applied to the subsequent calls to malloc() and friends. + * - migration of existing memory ranges, thanks to hwloc_set_area_membind() + * and friends, which move already-allocated data. + * + * Not all operating systems support all three ways. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding support in the currently used operating system. + * + * When the requested binding operation is not available and the + * ::HWLOC_MEMBIND_STRICT flag was passed, the function returns -1. + * \p errno will be set to \c ENOSYS when the system does support + * the specified action or policy + * (e.g., some systems only allow binding memory on a per-thread + * basis, whereas other systems only allow binding memory for all + * threads in a process). + * \p errno will be set to EXDEV when the requested set can not be enforced + * (e.g., some systems only allow binding memory to a single NUMA node). + * + * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well, + * or the operating system may use a slightly different operation + * (with side-effects, smaller binding set, etc.) + * when the requested operation is not exactly supported. + * + * The most portable form that should be preferred over the others + * whenever possible is as follows. + * It allocates some memory hopefully bound to the specified set. + * To do so, hwloc will possibly have to change the current memory + * binding policy in order to actually get the memory bound, if the OS + * does not provide any other way to simply allocate bound memory + * without changing the policy for all allocations. That is the + * difference with hwloc_alloc_membind(), which will never change the + * current memory binding policy. + * + * \code + * hwloc_alloc_membind_policy(topology, size, set, + * HWLOC_MEMBIND_BIND, 0); + * \endcode + * + * Each hwloc memory binding function is available in two forms: one + * that takes a bitmap argument (a CPU set by default, or a NUMA memory + * node set if the flag ::HWLOC_MEMBIND_BYNODESET is specified), + * and another one (whose name ends with _nodeset) that always takes + * a NUMA memory node set. + * See \ref hwlocality_object_sets and \ref hwlocality_bitmap for a + * discussion of CPU sets and NUMA memory node sets. + * It is also possible to convert between CPU set and node set using + * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset(). + * + * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes. + * Binding by nodeset should therefore be preferred whenever possible. + * + * \sa Some example codes are available under doc/examples/ in the source tree. + * + * \note On some operating systems, memory binding affects the CPU + * binding; see ::HWLOC_MEMBIND_NOCPUBIND + * @{ + */ + +/** \brief Memory binding policy. + * + * These constants can be used to choose the binding policy. Only one policy can + * be used at a time (i.e., the values cannot be OR'ed together). + * + * Not all systems support all kinds of binding. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding policy support in the currently used operating system. + * See the "Detailed Description" section of \ref hwlocality_membinding + * for a description of errors that can occur. + */ +typedef enum { + /** \brief Reset the memory allocation policy to the system default. + * Depending on the operating system, this may correspond to + * ::HWLOC_MEMBIND_FIRSTTOUCH (Linux), + * or ::HWLOC_MEMBIND_BIND (AIX, HP-UX, OSF, Solaris, Windows). + * This policy is never returned by get membind functions when running + * on normal machines. + * It is only returned when binding hooks are empty because the topology + * was loaded from XML, or HWLOC_THISSYSTEM=0, etc. + * \hideinitializer */ + HWLOC_MEMBIND_DEFAULT = 0, + + /** \brief Allocate memory + * but do not immediately bind it to a specific locality. Instead, + * each page in the allocation is bound only when it is first + * touched. Pages are individually bound to the local NUMA node of + * the first thread that touches it. If there is not enough memory + * on the node, allocation may be done in the specified nodes + * before allocating on other nodes. + * \hideinitializer */ + HWLOC_MEMBIND_FIRSTTOUCH = 1, + + /** \brief Allocate memory on the specified nodes. + * \hideinitializer */ + HWLOC_MEMBIND_BIND = 2, + + /** \brief Allocate memory on the given nodes in an interleaved + * / round-robin manner. The precise layout of the memory across + * multiple NUMA nodes is OS/system specific. Interleaving can be + * useful when threads distributed across the specified NUMA nodes + * will all be accessing the whole memory range concurrently, since + * the interleave will then balance the memory references. + * \hideinitializer */ + HWLOC_MEMBIND_INTERLEAVE = 3, + + /** \brief Replicate memory on the given nodes; reads from this + * memory will attempt to be serviced from the NUMA node local to + * the reading thread. Replicating can be useful when multiple + * threads from the specified NUMA nodes will be sharing the same + * read-only data. + * + * This policy can only be used with existing memory allocations + * (i.e., the hwloc_set_*membind*() functions); it cannot be used + * with functions that allocate new memory (i.e., the hwloc_alloc*() + * functions). + * \hideinitializer */ + HWLOC_MEMBIND_REPLICATE = 4, + + /** \brief For each page bound with this policy, by next time + * it is touched (and next time only), it is moved from its current + * location to the local NUMA node of the thread where the memory + * reference occurred (if it needs to be moved at all). + * \hideinitializer */ + HWLOC_MEMBIND_NEXTTOUCH = 5, + + /** \brief Returned by get_membind() functions when multiple + * threads or parts of a memory area have differing memory binding + * policies. + * \hideinitializer */ + HWLOC_MEMBIND_MIXED = -1 +} hwloc_membind_policy_t; + +/** \brief Memory binding flags. + * + * These flags can be used to refine the binding policy. + * All flags can be logically OR'ed together with the exception of + * ::HWLOC_MEMBIND_PROCESS and ::HWLOC_MEMBIND_THREAD; + * these two flags are mutually exclusive. + * + * Not all systems support all kinds of binding. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding support in the currently used operating system. + * See the "Detailed Description" section of \ref hwlocality_membinding + * for a description of errors that can occur. + */ +typedef enum { + /** \brief Set policy for all threads of the specified (possibly + * multithreaded) process. This flag is mutually exclusive with + * ::HWLOC_MEMBIND_THREAD. + * \hideinitializer */ + HWLOC_MEMBIND_PROCESS = (1<<0), + + /** \brief Set policy for a specific thread of the current process. + * This flag is mutually exclusive with ::HWLOC_MEMBIND_PROCESS. + * \hideinitializer */ + HWLOC_MEMBIND_THREAD = (1<<1), + + /** Request strict binding from the OS. The function will fail if + * the binding can not be guaranteed / completely enforced. + * + * This flag has slightly different meanings depending on which + * function it is used with. + * \hideinitializer */ + HWLOC_MEMBIND_STRICT = (1<<2), + + /** \brief Migrate existing allocated memory. If the memory cannot + * be migrated and the ::HWLOC_MEMBIND_STRICT flag is passed, an error + * will be returned. + * \hideinitializer */ + HWLOC_MEMBIND_MIGRATE = (1<<3), + + /** \brief Avoid any effect on CPU binding. + * + * On some operating systems, some underlying memory binding + * functions also bind the application to the corresponding CPU(s). + * Using this flag will cause hwloc to avoid using OS functions that + * could potentially affect CPU bindings. Note, however, that using + * NOCPUBIND may reduce hwloc's overall memory binding + * support. Specifically: some of hwloc's memory binding functions + * may fail with errno set to ENOSYS when used with NOCPUBIND. + * \hideinitializer + */ + HWLOC_MEMBIND_NOCPUBIND = (1<<4), + + /** \brief Consider the bitmap argument as a nodeset. + * + * Functions whose name ends with _nodeset() take a nodeset argument. + * Other functions take a bitmap argument that is considered a nodeset + * if this flag is given, or a cpuset otherwise. + * + * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes. + * Binding by nodeset should therefore be preferred whenever possible. + * \hideinitializer + */ + HWLOC_MEMBIND_BYNODESET = (1<<5) +} hwloc_membind_flags_t; + +/** \brief Set the default memory binding policy of the current + * process or thread to prefer the NUMA node(s) specified by \p nodeset + * + * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is + * specified, the current process is assumed to be single-threaded. + * This is the most portable form as it permits hwloc to use either + * process-based OS functions or thread-based OS functions, depending + * on which are available. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + +/** \brief Set the default memory binding policy of the current + * process or thread to prefer the NUMA node(s) specified by \p set + * + * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is + * specified, the current process is assumed to be single-threaded. + * This is the most portable form as it permits hwloc to use either + * process-based OS functions or thread-based OS functions, depending + * on which are available. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * current process or thread. + * + * This function has two output parameters: \p nodeset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that + * the query target is the current policy and nodeset for only the + * thread invoking this function. + * + * If neither of these flags are passed (which is the most portable + * method), the process is assumed to be single threaded. This allows + * hwloc to use either process-based OS functions or thread-based OS + * functions, depending on which are available. + * + * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS + * is also specified. In this case, hwloc will check the default + * memory policies and nodesets for all threads in the process. If + * they are not identical, -1 is returned and errno is set to EXDEV. + * If they are identical, the values are returned in \p nodeset and \p + * policy. + * + * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and + * ::HWLOC_MEMBIND_STRICT is \em not specified), \p nodeset is set to + * the logical OR of all threads' default nodeset. + * If all threads' default policies are the same, \p policy is set to + * that policy. If they are different, \p policy is set to + * ::HWLOC_MEMBIND_MIXED. + * + * In the ::HWLOC_MEMBIND_THREAD case (or when neither + * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there + * is only one nodeset and policy; they are returned in \p nodeset and + * \p policy, respectively. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * current process or thread. + * + * This function has two output parameters: \p set and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that + * the query target is the current policy and nodeset for only the + * thread invoking this function. + * + * If neither of these flags are passed (which is the most portable + * method), the process is assumed to be single threaded. This allows + * hwloc to use either process-based OS functions or thread-based OS + * functions, depending on which are available. + * + * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS + * is also specified. In this case, hwloc will check the default + * memory policies and nodesets for all threads in the process. If + * they are not identical, -1 is returned and errno is set to EXDEV. + * If they are identical, the values are returned in \p set and \p + * policy. + * + * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and + * ::HWLOC_MEMBIND_STRICT is \em not specified), the default set + * from each thread is logically OR'ed together. + * If all threads' default policies are the same, \p policy is set to + * that policy. If they are different, \p policy is set to + * ::HWLOC_MEMBIND_MIXED. + * + * In the ::HWLOC_MEMBIND_THREAD case (or when neither + * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there + * is only one set and policy; they are returned in \p set and + * \p policy, respectively. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); + +/** \brief Set the default memory binding policy of the specified + * process to prefer the NUMA node(s) specified by \p nodeset + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + +/** \brief Set the default memory binding policy of the specified + * process to prefer the NUMA node(s) specified by \p set + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * specified process. + * + * This function has two output parameters: \p nodeset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified + * (which is the most portable method), the process is assumed to be + * single threaded. This allows hwloc to use either process-based OS + * functions or thread-based OS functions, depending on which are + * available. + * + * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to + * this function. + * + * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default + * memory policies and nodesets for all threads in the specified + * process. If they are not identical, -1 is returned and errno is + * set to EXDEV. If they are identical, the values are returned in \p + * nodeset and \p policy. + * + * Otherwise, \p nodeset is set to the logical OR of all threads' + * default nodeset. If all threads' default policies are the same, \p + * policy is set to that policy. If they are different, \p policy is + * set to ::HWLOC_MEMBIND_MIXED. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * specified process. + * + * This function has two output parameters: \p set and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified + * (which is the most portable method), the process is assumed to be + * single threaded. This allows hwloc to use either process-based OS + * functions or thread-based OS functions, depending on which are + * available. + * + * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to + * this function. + * + * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default + * memory policies and nodesets for all threads in the specified + * process. If they are not identical, -1 is returned and errno is + * set to EXDEV. If they are identical, the values are returned in \p + * set and \p policy. + * + * Otherwise, \p set is set to the logical OR of all threads' + * default set. If all threads' default policies + * are the same, \p policy is set to that policy. If they are + * different, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); + +/** \brief Bind the already-allocated memory identified by (addr, len) + * to the NUMA node(s) specified by \p nodeset. + * + * \return 0 if \p len is 0. + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + +/** \brief Bind the already-allocated memory identified by (addr, len) + * to the NUMA node(s) specified by \p set. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \return 0 if \p len is 0. + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the physical NUMA node(s) and binding policy of the memory + * identified by (\p addr, \p len ). + * + * This function has two output parameters: \p nodeset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the memory binding policies and nodesets of the pages + * in the address range. + * + * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first + * checked to see if they all have the same memory binding policy and + * nodeset. If they do not, -1 is returned and errno is set to EXDEV. + * If they are identical across all pages, the nodeset and policy are + * returned in \p nodeset and \p policy, respectively. + * + * If ::HWLOC_MEMBIND_STRICT is not specified, \p nodeset is set to the + * union of all NUMA node(s) containing pages in the address range. + * If all pages in the target have the same policy, it is returned in + * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If \p len is 0, -1 is returned and errno is set to EINVAL. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of + * the memory identified by (\p addr, \p len ). + * + * This function has two output parameters: \p set and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the memory binding policies and nodesets of the pages + * in the address range. + * + * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first + * checked to see if they all have the same memory binding policy and + * nodeset. If they do not, -1 is returned and errno is set to EXDEV. + * If they are identical across all pages, the set and policy are + * returned in \p set and \p policy, respectively. + * + * If ::HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA + * node(s) containing pages in the address range is calculated. + * If all pages in the target have the same policy, it is returned in + * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * If \p len is 0, -1 is returned and errno is set to EINVAL. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); + +/** \brief Get the NUMA nodes where memory identified by (\p addr, \p len ) is physically allocated. + * + * Fills \p set according to the NUMA nodes where the memory area pages + * are physically allocated. If no page is actually allocated yet, + * \p set may be empty. + * + * If pages spread to multiple nodes, it is not specified whether they spread + * equitably, or whether most of them are on a single node, etc. + * + * The operating system may move memory pages from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified in \p flags, set is + * considered a nodeset. Otherwise it's a cpuset. + * + * If \p len is 0, \p set is emptied. + */ +HWLOC_DECLSPEC int hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, int flags); + +/** \brief Allocate some memory + * + * This is equivalent to malloc(), except that it tries to allocate + * page-aligned memory from the OS. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len); + +/** \brief Allocate some memory on NUMA memory nodes specified by \p nodeset + * + * \return NULL with errno set to ENOSYS if the action is not supported + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to EXDEV if the binding cannot be enforced + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to ENOMEM if the memory allocation failed + * even before trying to bind. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on NUMA memory nodes specified by \p set + * + * \return NULL with errno set to ENOSYS if the action is not supported + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to EXDEV if the binding cannot be enforced + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to ENOMEM if the memory allocation failed + * even before trying to bind. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on NUMA memory nodes specified by \p nodeset + * + * This is similar to hwloc_alloc_membind() except that it is allowed to change + * the current memory binding policy, thus providing more binding support, at + * the expense of changing the current state. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on NUMA memory nodes specified by \p set + * + * This is similar to hwloc_alloc_membind_nodeset() except that it is allowed to change + * the current memory binding policy, thus providing more binding support, at + * the expense of changing the current state. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Free memory that was previously allocated by hwloc_alloc() + * or hwloc_alloc_membind(). + */ +HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len); + +/** @} */ + + + +/** \defgroup hwlocality_tinker Modifying a loaded Topology + * @{ + */ + +/** \brief Add a MISC object to the topology + * + * A new MISC object will be created and inserted into the topology at the + * position given by bitmap \p cpuset. This offers a way to add new + * intermediate levels to the topology hierarchy. + * + * \p cpuset and \p name will be copied to setup the new object attributes. + * + * \return the newly-created object. + * \return \c NULL if the insertion conflicts with the existing topology tree. + * + * \note If \p name contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml(). + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, const char *name); + +/** \brief Add a MISC object as a leaf of the topology + * + * A new MISC object will be created and inserted into the topology at the + * position given by parent. It is appended to the list of existing children, + * without ever adding any intermediate hierarchy level. This is useful for + * annotating the topology without actually changing the hierarchy. + * + * \p name will be copied to the setup the new object attributes. + * However, the new leaf object will not have any \p cpuset. + * + * \return the newly-created object + * + * \note If \p name contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml(). + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name); + +/** \brief Flags to be given to hwloc_topology_restrict(). */ +enum hwloc_restrict_flags_e { + /** \brief Adapt distance matrices according to objects being removed during restriction. + * If this flag is not set, distance matrices are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES = (1<<0), + + /** \brief Move Misc objects to ancestors if their parents are removed during restriction. + * If this flag is not set, Misc objects are removed when their parents are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1<<1), + + /** \brief Move I/O objects to ancestors if their parents are removed during restriction. + * If this flag is not set, I/O devices and bridges are removed when their parents are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_IO = (1<<2) +}; + +/** \brief Restrict the topology to the given CPU set. + * + * Topology \p topology is modified so as to remove all objects that + * are not included (or partially included) in the CPU set \p cpuset. + * All objects CPU and node sets are restricted accordingly. + * + * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. + * + * \note This call may not be reverted by restricting back to a larger + * cpuset. Once dropped during restriction, objects may not be brought + * back, except by loading another topology with hwloc_topology_load(). + * + * \return 0 on success. + * + * \return -1 with errno set to EINVAL if the input cpuset is invalid. + * The topology is not modified in this case. + * + * \return -1 with errno set to ENOMEM on failure to allocate internal data. + * The topology is reinitialized in this case. It should be either + * destroyed with hwloc_topology_destroy() or configured and loaded again. + */ +HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); + +/** @} */ + + + +/** \defgroup hwlocality_custom Building Custom Topologies + * + * A custom topology may be initialized by calling hwloc_topology_set_custom() + * after hwloc_topology_init(). It may then be modified by inserting objects + * or entire topologies. Once done assembling, hwloc_topology_load() should + * be invoked as usual to finalize the topology. + * @{ + */ + +/** \brief Insert an existing topology inside a custom topology + * + * Duplicate the existing topology \p oldtopology inside a new + * custom topology \p newtopology as a leaf of object \p newparent. + * + * If \p oldroot is not \c NULL, duplicate \p oldroot and all its + * children instead of the entire \p oldtopology. Passing the root + * object of \p oldtopology in \p oldroot is equivalent to passing + * \c NULL. + * + * The custom topology \p newtopology must have been prepared with + * hwloc_topology_set_custom() and not loaded with hwloc_topology_load() + * yet. + * + * \p newparent may be either the root of \p newtopology or an object + * that was added through hwloc_custom_insert_group_object_by_parent(). + * + * \note The cpuset and nodeset of the \p newparent object are not + * modified based on the contents of \p oldtopology. + */ +HWLOC_DECLSPEC int hwloc_custom_insert_topology(hwloc_topology_t newtopology, hwloc_obj_t newparent, hwloc_topology_t oldtopology, hwloc_obj_t oldroot); + +/** \brief Insert a new group object inside a custom topology + * + * An object with type ::HWLOC_OBJ_GROUP is inserted as a new child + * of object \p parent. + * + * \p groupdepth is the depth attribute to be given to the new object. + * It may for instance be 0 for top-level groups, 1 for their children, + * and so on. + * + * The custom topology \p newtopology must have been prepared with + * hwloc_topology_set_custom() and not loaded with hwloc_topology_load() + * yet. + * + * \p parent may be either the root of \p topology or an object that + * was added earlier through hwloc_custom_insert_group_object_by_parent(). + * + * \note The cpuset and nodeset of the new group object are NULL because + * these sets are meaningless when assembling multiple topologies. + * + * \note The cpuset and nodeset of the \p parent object are not modified. + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_custom_insert_group_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, int groupdepth); + +/** @} */ + + + +/** \defgroup hwlocality_xmlexport Exporting Topologies to XML + * @{ + */ + +/** \brief Export the topology into an XML file. + * + * This file may be loaded later through hwloc_topology_set_xml(). + * + * \return -1 if a failure occured. + * + * \note See also hwloc_topology_set_userdata_export_callback() + * for exporting application-specific object userdata. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + * + * \note Only printable characters may be exported to XML string attributes. + * Any other character, especially any non-ASCII character, will be silently + * dropped. + * + * \note If \p name is "-", the XML output is sent to the standard output. + */ +HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath); + +/** \brief Export the topology into a newly-allocated XML memory buffer. + * + * \p xmlbuffer is allocated by the callee and should be freed with + * hwloc_free_xmlbuffer() later in the caller. + * + * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer(). + * + * \return -1 if a failure occured. + * + * \note See also hwloc_topology_set_userdata_export_callback() + * for exporting application-specific object userdata. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + * + * \note Only printable characters may be exported to XML string attributes. + * Any other character, especially any non-ASCII character, will be silently + * dropped. + */ +HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen); + +/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */ +HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer); + +/** \brief Set the application-specific callback for exporting object userdata + * + * The object userdata pointer is not exported to XML by default because hwloc + * does not know what it contains. + * + * This function lets applications set \p export_cb to a callback function + * that converts this opaque userdata into an exportable string. + * + * \p export_cb is invoked during XML export for each object whose + * \p userdata pointer is not \c NULL. + * The callback should use hwloc_export_obj_userdata() or + * hwloc_export_obj_userdata_base64() to actually export + * something to XML (possibly multiple times per object). + * + * \p export_cb may be set to \c NULL if userdata should not be exported to XML. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology, + void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj)); + +/** \brief Export some object userdata to XML + * + * This function may only be called from within the export() callback passed + * to hwloc_topology_set_userdata_export_callback(). + * It may be invoked one of multiple times to export some userdata to XML. + * The \p buffer content of length \p length is stored with optional name + * \p name. + * + * When importing this XML file, the import() callback (if set) will be + * called exactly as many times as hwloc_export_obj_userdata() was called + * during export(). It will receive the corresponding \p name, \p buffer + * and \p length arguments. + * + * \p reserved, \p topology and \p obj must be the first three parameters + * that were given to the export callback. + * + * Only printable characters may be exported to XML string attributes. + * If a non-printable character is passed in \p name or \p buffer, + * the function returns -1 with errno set to EINVAL. + * + * If exporting binary data, the application should first encode into + * printable characters only (or use hwloc_export_obj_userdata_base64()). + * It should also take care of portability issues if the export may + * be reimported on a different architecture. + */ +HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); + +/** \brief Encode and export some object userdata to XML + * + * This function is similar to hwloc_export_obj_userdata() but it encodes + * the input buffer into printable characters before exporting. + * On import, decoding is automatically performed before the data is given + * to the import() callback if any. + * + * This function may only be called from within the export() callback passed + * to hwloc_topology_set_userdata_export_callback(). + * + * The function does not take care of portability issues if the export + * may be reimported on a different architecture. + */ +HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); + +/** \brief Set the application-specific callback for importing userdata + * + * On XML import, userdata is ignored by default because hwloc does not know + * how to store it in memory. + * + * This function lets applications set \p import_cb to a callback function + * that will get the XML-stored userdata and store it in the object as expected + * by the application. + * + * \p import_cb is called during hwloc_topology_load() as many times as + * hwloc_export_obj_userdata() was called during export. The topology + * is not entirely setup yet. Object attributes are ready to consult, + * but links between objects are not. + * + * \p import_cb may be \c NULL if userdata should be ignored during import. + * + * \note \p buffer contains \p length characters followed by a null byte ('\0'). + * + * \note This function should be called before hwloc_topology_load(). + * + * \note The topology-specific userdata pointer is ignored when importing from XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology, + void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length)); + +/** @} */ + + +/** \defgroup hwlocality_syntheticexport Exporting Topologies to Synthetic + * @{ + */ + +/** \brief Flags for exporting synthetic topologies. + * + * Flags to be given as a OR'ed set to hwloc_topology_export_synthetic(). + */ +enum hwloc_topology_export_synthetic_flags_e { + /** \brief Export extended types such as L2dcache as basic types such as Cache. + * + * This is required if loading the synthetic description with hwloc < 1.9. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES = (1UL<<0), + + /** \brief Do not export level attributes. + * + * Ignore level attributes such as memory/cache sizes or PU indexes. + * This is required if loading the synthetic description with hwloc < 1.10. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS = (1UL<<1) +}; + +/** \brief Export the topology as a synthetic string. + * + * At most \p buflen characters will be written in \p buffer, + * including the terminating \0. + * + * This exported string may be given back to hwloc_topology_set_synthetic(). + * + * \p flags is a OR'ed set of hwloc_topology_export_synthetic_flags_e. + * + * \return The number of characters that were written, + * not including the terminating \0. + * + * \return -1 if the topology could not be exported, + * for instance if it is not symmetric. + * + * \note A 1024-byte buffer should be large enough for exporting + * topologies in the vast majority of cases. + */ + HWLOC_DECLSPEC int hwloc_topology_export_synthetic(hwloc_topology_t topology, char *buffer, size_t buflen, unsigned long flags); + +/** @} */ + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +/* high-level helpers */ +#include + +/* inline code of some functions above */ +#include + +/* topology diffs */ +#include + +/* deprecated headers */ +#include + +#endif /* HWLOC_H */ diff --git a/rocrtst/thirdparty/include/hwloc/autogen/config.h b/rocrtst/thirdparty/include/hwloc/autogen/config.h new file mode 100644 index 0000000000..f18e9528a8 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/autogen/config.h @@ -0,0 +1,202 @@ +/* include/hwloc/autogen/config.h. Generated from config.h.in by configure. */ +/* -*- c -*- + * Copyright © 2009 CNRS + * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* The configuration file */ + +#ifndef HWLOC_CONFIG_H +#define HWLOC_CONFIG_H + +#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)) +# define __hwloc_restrict __restrict +#else +# if __STDC_VERSION__ >= 199901L +# define __hwloc_restrict restrict +# else +# define __hwloc_restrict +# endif +#endif + +/* Note that if we're compiling C++, then just use the "inline" + keyword, since it's part of C++ */ +#if defined(c_plusplus) || defined(__cplusplus) +# define __hwloc_inline inline +#elif defined(_MSC_VER) || defined(__HP_cc) +# define __hwloc_inline __inline +#else +# define __hwloc_inline __inline__ +#endif + +/* + * Note: this is public. We can not assume anything from the compiler used + * by the application and thus the HWLOC_HAVE_* macros below are not + * fetched from the autoconf result here. We only automatically use a few + * well-known easy cases. + */ + +/* Some handy constants to make the logic below a little more readable */ +#if defined(__cplusplus) && \ + (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR >= 4)) +#define GXX_ABOVE_3_4 1 +#else +#define GXX_ABOVE_3_4 0 +#endif + +#if !defined(__cplusplus) && \ + (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)) +#define GCC_ABOVE_2_95 1 +#else +#define GCC_ABOVE_2_95 0 +#endif + +#if !defined(__cplusplus) && \ + (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +#define GCC_ABOVE_2_96 1 +#else +#define GCC_ABOVE_2_96 0 +#endif + +#if !defined(__cplusplus) && \ + (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) +#define GCC_ABOVE_3_3 1 +#else +#define GCC_ABOVE_3_3 0 +#endif + +/* Maybe before gcc 2.95 too */ +#ifdef HWLOC_HAVE_ATTRIBUTE_UNUSED +#define __HWLOC_HAVE_ATTRIBUTE_UNUSED HWLOC_HAVE_ATTRIBUTE_UNUSED +#elif defined(__GNUC__) +# define __HWLOC_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95) +#else +# define __HWLOC_HAVE_ATTRIBUTE_UNUSED 0 +#endif +#if __HWLOC_HAVE_ATTRIBUTE_UNUSED +# define __hwloc_attribute_unused __attribute__((__unused__)) +#else +# define __hwloc_attribute_unused +#endif + +#ifdef HWLOC_HAVE_ATTRIBUTE_MALLOC +#define __HWLOC_HAVE_ATTRIBUTE_MALLOC HWLOC_HAVE_ATTRIBUTE_MALLOC +#elif defined(__GNUC__) +# define __HWLOC_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96) +#else +# define __HWLOC_HAVE_ATTRIBUTE_MALLOC 0 +#endif +#if __HWLOC_HAVE_ATTRIBUTE_MALLOC +# define __hwloc_attribute_malloc __attribute__((__malloc__)) +#else +# define __hwloc_attribute_malloc +#endif + +#ifdef HWLOC_HAVE_ATTRIBUTE_CONST +#define __HWLOC_HAVE_ATTRIBUTE_CONST HWLOC_HAVE_ATTRIBUTE_CONST +#elif defined(__GNUC__) +# define __HWLOC_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95) +#else +# define __HWLOC_HAVE_ATTRIBUTE_CONST 0 +#endif +#if __HWLOC_HAVE_ATTRIBUTE_CONST +# define __hwloc_attribute_const __attribute__((__const__)) +#else +# define __hwloc_attribute_const +#endif + +#ifdef HWLOC_HAVE_ATTRIBUTE_PURE +#define __HWLOC_HAVE_ATTRIBUTE_PURE HWLOC_HAVE_ATTRIBUTE_PURE +#elif defined(__GNUC__) +# define __HWLOC_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96) +#else +# define __HWLOC_HAVE_ATTRIBUTE_PURE 0 +#endif +#if __HWLOC_HAVE_ATTRIBUTE_PURE +# define __hwloc_attribute_pure __attribute__((__pure__)) +#else +# define __hwloc_attribute_pure +#endif + +#ifdef HWLOC_HAVE_ATTRIBUTE_DEPRECATED +#define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED HWLOC_HAVE_ATTRIBUTE_DEPRECATED +#elif defined(__GNUC__) +# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3) +#else +# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED 0 +#endif +#if __HWLOC_HAVE_ATTRIBUTE_DEPRECATED +# define __hwloc_attribute_deprecated __attribute__((__deprecated__)) +#else +# define __hwloc_attribute_deprecated +#endif + +#ifdef HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS +#define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS +#elif defined(__GNUC__) +# define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS (GXX_ABOVE_3_4 || GCC_ABOVE_3_3) +#else +# define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS 0 +#endif +#if __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS +# define __hwloc_attribute_may_alias __attribute__((__may_alias__)) +#else +# define __hwloc_attribute_may_alias +#endif + +#ifdef HWLOC_C_HAVE_VISIBILITY +# if HWLOC_C_HAVE_VISIBILITY +# define HWLOC_DECLSPEC __attribute__((__visibility__("default"))) +# else +# define HWLOC_DECLSPEC +# endif +#else +# define HWLOC_DECLSPEC +#endif + +/* Defined to 1 on Linux */ +#define HWLOC_LINUX_SYS 1 + +/* Defined to 1 if the CPU_SET macro works */ +#define HWLOC_HAVE_CPU_SET 1 + +/* Defined to 1 if you have the `windows.h' header. */ +/* #undef HWLOC_HAVE_WINDOWS_H */ +#define hwloc_pid_t pid_t +#define hwloc_thread_t pthread_t + +#ifdef HWLOC_HAVE_WINDOWS_H + +# include +typedef DWORDLONG hwloc_uint64_t; + +#else /* HWLOC_HAVE_WINDOWS_H */ + +# ifdef hwloc_thread_t +# include +# endif /* hwloc_thread_t */ + +/* Defined to 1 if you have the header file. */ +# define HWLOC_HAVE_STDINT_H 1 + +# include +# ifdef HWLOC_HAVE_STDINT_H +# include +# endif +typedef uint64_t hwloc_uint64_t; + +#endif /* HWLOC_HAVE_WINDOWS_H */ + +/* Whether we need to re-define all the hwloc public symbols or not */ +#define HWLOC_SYM_TRANSFORM 0 + +/* The hwloc symbol prefix */ +#define HWLOC_SYM_PREFIX hwloc_ + +/* The hwloc symbol prefix in all caps */ +#define HWLOC_SYM_PREFIX_CAPS HWLOC_ + +#endif /* HWLOC_CONFIG_H */ diff --git a/rocrtst/thirdparty/include/hwloc/bitmap.h b/rocrtst/thirdparty/include/hwloc/bitmap.h new file mode 100644 index 0000000000..c57ffdbed5 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/bitmap.h @@ -0,0 +1,382 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief The bitmap API, for use in hwloc itself. + */ + +#ifndef HWLOC_BITMAP_H +#define HWLOC_BITMAP_H + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_bitmap The bitmap API + * + * The ::hwloc_bitmap_t type represents a set of objects, typically OS + * processors -- which may actually be hardware threads (represented + * by ::hwloc_cpuset_t, which is a typedef for ::hwloc_bitmap_t) -- or + * memory nodes (represented by ::hwloc_nodeset_t, which is also a + * typedef for ::hwloc_bitmap_t). + * + * Both CPU and node sets are always indexed by OS physical number. + * + * \note CPU sets and nodesets are described in \ref hwlocality_object_sets. + * + * A bitmap may be of infinite size (all bits are set after some point). + * A bitmap may even be full if all bits are set. + * + * \note Several examples of using the bitmap API are available under the + * doc/examples/ directory in the source tree. + * Regression tests such as tests/hwloc/hwloc_bitmap*.c also make intensive use + * of this API. + * @{ + */ + + +/** \brief + * Set of bits represented as an opaque pointer to an internal bitmap. + */ +typedef struct hwloc_bitmap_s * hwloc_bitmap_t; +/** \brief a non-modifiable ::hwloc_bitmap_t */ +typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t; + + +/* + * Bitmap allocation, freeing and copying. + */ + +/** \brief Allocate a new empty bitmap. + * + * \returns A valid bitmap or \c NULL. + * + * The bitmap should be freed by a corresponding call to + * hwloc_bitmap_free(). + */ +HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc; + +/** \brief Allocate a new full bitmap. */ +HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc; + +/** \brief Free bitmap \p bitmap. + * + * If \p bitmap is \c NULL, no operation is performed. + */ +HWLOC_DECLSPEC void hwloc_bitmap_free(hwloc_bitmap_t bitmap); + +/** \brief Duplicate bitmap \p bitmap by allocating a new bitmap and copying \p bitmap contents. + * + * If \p bitmap is \c NULL, \c NULL is returned. + */ +HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_dup(hwloc_const_bitmap_t bitmap) __hwloc_attribute_malloc; + +/** \brief Copy the contents of bitmap \p src into the already allocated bitmap \p dst */ +HWLOC_DECLSPEC void hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src); + + +/* + * Bitmap/String Conversion + */ + +/** \brief Stringify a bitmap. + * + * Up to \p buflen characters may be written in buffer \p buf. + * + * If \p buflen is 0, \p buf may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); + +/** \brief Stringify a bitmap into a newly allocated string. + * + * \return -1 on error. + */ +HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); + +/** \brief Parse a bitmap string and stores it in bitmap \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); + +/** \brief Stringify a bitmap in the list format. + * + * Lists are comma-separated indexes or ranges. + * Ranges are dash separated indexes. + * The last range may not have an ending indexes if the bitmap is infinitely set. + * + * Up to \p buflen characters may be written in buffer \p buf. + * + * If \p buflen is 0, \p buf may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); + +/** \brief Stringify a bitmap into a newly allocated list string. + * + * \return -1 on error. + */ +HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); + +/** \brief Parse a list string and stores it in bitmap \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); + +/** \brief Stringify a bitmap in the taskset-specific format. + * + * The taskset command manipulates bitmap strings that contain a single + * (possible very long) hexadecimal number starting with 0x. + * + * Up to \p buflen characters may be written in buffer \p buf. + * + * If \p buflen is 0, \p buf may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); + +/** \brief Stringify a bitmap into a newly allocated taskset-specific string. + * + * \return -1 on error. + */ +HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); + +/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); + + +/* + * Building bitmaps. + */ + +/** \brief Empty the bitmap \p bitmap */ +HWLOC_DECLSPEC void hwloc_bitmap_zero(hwloc_bitmap_t bitmap); + +/** \brief Fill bitmap \p bitmap with all possible indexes (even if those objects don't exist or are otherwise unavailable) */ +HWLOC_DECLSPEC void hwloc_bitmap_fill(hwloc_bitmap_t bitmap); + +/** \brief Empty the bitmap \p bitmap and add bit \p id */ +HWLOC_DECLSPEC void hwloc_bitmap_only(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Fill the bitmap \p and clear the index \p id */ +HWLOC_DECLSPEC void hwloc_bitmap_allbut(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Setup bitmap \p bitmap from unsigned long \p mask */ +HWLOC_DECLSPEC void hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask); + +/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */ +HWLOC_DECLSPEC void hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); + + +/* + * Modifying bitmaps. + */ + +/** \brief Add index \p id in bitmap \p bitmap */ +HWLOC_DECLSPEC void hwloc_bitmap_set(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Add indexes from \p begin to \p end in bitmap \p bitmap. + * + * If \p end is \c -1, the range is infinite. + */ +HWLOC_DECLSPEC void hwloc_bitmap_set_range(hwloc_bitmap_t bitmap, unsigned begin, int end); + +/** \brief Replace \p i -th subset of bitmap \p bitmap with unsigned long \p mask */ +HWLOC_DECLSPEC void hwloc_bitmap_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); + +/** \brief Remove index \p id from bitmap \p bitmap */ +HWLOC_DECLSPEC void hwloc_bitmap_clr(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Remove indexes from \p begin to \p end in bitmap \p bitmap. + * + * If \p end is \c -1, the range is infinite. + */ +HWLOC_DECLSPEC void hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, int end); + +/** \brief Keep a single index among those set in bitmap \p bitmap + * + * May be useful before binding so that the process does not + * have a chance of migrating between multiple logical CPUs + * in the original mask. + */ +HWLOC_DECLSPEC void hwloc_bitmap_singlify(hwloc_bitmap_t bitmap); + + +/* + * Consulting bitmaps. + */ + +/** \brief Convert the beginning part of bitmap \p bitmap into unsigned long \p mask */ +HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */ +HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure; + +/** \brief Test whether index \p id is part of bitmap \p bitmap */ +HWLOC_DECLSPEC int hwloc_bitmap_isset(hwloc_const_bitmap_t bitmap, unsigned id) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p bitmap is empty */ +HWLOC_DECLSPEC int hwloc_bitmap_iszero(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p bitmap is completely full + * + * \note A full bitmap is always infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the first index (least significant bit) in bitmap \p bitmap + * + * \return -1 if no index is set in \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev + * + * If \p prev is -1, the first index is returned. + * + * \return -1 if no index with higher index is set in \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; + +/** \brief Compute the last index (most significant bit) in bitmap \p bitmap + * + * \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the "weight" of bitmap \p bitmap (i.e., number of + * indexes that are in the bitmap). + * + * \return the number of indexes that are in the bitmap. + * + * \return -1 if \p bitmap is infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Loop macro iterating on bitmap \p bitmap + * + * The loop must start with hwloc_bitmap_foreach_begin() and end + * with hwloc_bitmap_foreach_end() followed by a terminating ';'. + * + * \p index is the loop variable; it should be an unsigned int. The + * first iteration will set \p index to the lowest index in the bitmap. + * Successive iterations will iterate through, in order, all remaining + * indexes set in the bitmap. To be specific: each iteration will return a + * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true. + * + * The assert prevents the loop from being infinite if the bitmap is infinitely set. + * + * \hideinitializer + */ +#define hwloc_bitmap_foreach_begin(id, bitmap) \ +do { \ + assert(hwloc_bitmap_weight(bitmap) != -1); \ + for (id = hwloc_bitmap_first(bitmap); \ + (unsigned) id != (unsigned) -1; \ + id = hwloc_bitmap_next(bitmap, id)) { + +/** \brief End of loop macro iterating on a bitmap. + * + * Needs a terminating ';'. + * + * \sa hwloc_bitmap_foreach_begin() + * \hideinitializer + */ +#define hwloc_bitmap_foreach_end() \ + } \ +} while (0) + + +/* + * Combining bitmaps. + */ + +/** \brief Or bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC void hwloc_bitmap_or (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief And bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC void hwloc_bitmap_and (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief And bitmap \p bitmap1 and the negation of \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC void hwloc_bitmap_andnot (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief Xor bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC void hwloc_bitmap_xor (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief Negate bitmap \p bitmap and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap + */ +HWLOC_DECLSPEC void hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap); + + +/* + * Comparing bitmaps. + */ + +/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects */ +HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap. + * + * \note The empty bitmap is considered included in any other bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_isincluded (hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p bitmap1 is equal to bitmap \p bitmap2 */ +HWLOC_DECLSPEC int hwloc_bitmap_isequal (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 using their lowest index. + * + * Smaller least significant bit is smaller. + * The empty bitmap is considered higher than anything. + */ +HWLOC_DECLSPEC int hwloc_bitmap_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 in lexicographic order. + * + * Lexicographic comparison of bitmaps, starting for their highest indexes. + * Compare last indexes first, then second, etc. + * The empty bitmap is considered lower than anything. + * + * \note This is different from the non-existing hwloc_bitmap_compare_last() + * which would only compare the highest index of each bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_BITMAP_H */ diff --git a/rocrtst/thirdparty/include/hwloc/cpuset.h b/rocrtst/thirdparty/include/hwloc/cpuset.h new file mode 100644 index 0000000000..1156b6e858 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/cpuset.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2010 inria. All rights reserved. + * Copyright © 2009-2010, 2013 Université Bordeaux 1 + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief The old deprecated Cpuset API. + * This interface should not be used anymore, it will be dropped in a later release. + * + * hwloc/bitmap.h should be used instead. Most hwloc_cpuset_foo functions are + * replaced with hwloc_bitmap_foo. The only exceptions are: + * - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf + * - hwloc_cpuset_cpu -> hwloc_bitmap_only + * - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut + */ + +#ifndef HWLOC_CPUSET_H +#define HWLOC_CPUSET_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "hwloc/bitmap.h" + +static __hwloc_inline hwloc_bitmap_t __hwloc_attribute_deprecated hwloc_cpuset_alloc(void) { return hwloc_bitmap_alloc(); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_free(hwloc_bitmap_t bitmap) { hwloc_bitmap_free(bitmap); } +static __hwloc_inline hwloc_bitmap_t __hwloc_attribute_deprecated hwloc_cpuset_dup(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_dup(bitmap); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src) { hwloc_bitmap_copy(dst, src); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_snprintf(buf, buflen, bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_asprintf(strp, bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_from_string(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string) { return hwloc_bitmap_sscanf(bitmap, string); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_zero(hwloc_bitmap_t bitmap) { hwloc_bitmap_zero(bitmap); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_fill(hwloc_bitmap_t bitmap) { hwloc_bitmap_fill(bitmap); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask) { hwloc_bitmap_from_ulong(bitmap, mask); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask) { hwloc_bitmap_from_ith_ulong(bitmap, i, mask); } +static __hwloc_inline unsigned __hwloc_attribute_deprecated long hwloc_cpuset_to_ulong(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_to_ulong(bitmap); } +static __hwloc_inline unsigned __hwloc_attribute_deprecated long hwloc_cpuset_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) { return hwloc_bitmap_to_ith_ulong(bitmap, i); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_cpu(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_only(bitmap, index_); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_all_but_cpu(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_allbut(bitmap, index_); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_set(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_set(bitmap, index_); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_set_range(hwloc_bitmap_t bitmap, unsigned begin, unsigned end) { hwloc_bitmap_set_range(bitmap, begin, end); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask) { hwloc_bitmap_set_ith_ulong(bitmap, i, mask); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_clr(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_clr(bitmap, index_); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_clr_range(hwloc_bitmap_t bitmap, unsigned begin, unsigned end) { hwloc_bitmap_clr_range(bitmap, begin, end); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isset(hwloc_const_bitmap_t bitmap, unsigned index_) { return hwloc_bitmap_isset(bitmap, index_); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_iszero(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_iszero(bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isfull(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_isfull(bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isequal(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_isequal(bitmap1, bitmap2); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_intersects(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_intersects(bitmap1, bitmap2); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isincluded(hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) { return hwloc_bitmap_isincluded(sub_bitmap, super_bitmap); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_or(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_or(res, bitmap1, bitmap2); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_and(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_and(res, bitmap1, bitmap2); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_andnot(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_andnot(res, bitmap1, bitmap2); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_xor(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_xor(res, bitmap1, bitmap2); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_not(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap) { hwloc_bitmap_not(res, bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_first(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_first(bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_last(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_last(bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_next(hwloc_const_bitmap_t bitmap, unsigned prev) { return hwloc_bitmap_next(bitmap, prev); } +static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_singlify(hwloc_bitmap_t bitmap) { hwloc_bitmap_singlify(bitmap); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_compare_first(bitmap1, bitmap2); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_compare(bitmap1, bitmap2); } +static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_weight(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_weight(bitmap); } + +#define hwloc_cpuset_foreach_begin hwloc_bitmap_foreach_begin +#define hwloc_cpuset_foreach_end hwloc_bitmap_foreach_end + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* HWLOC_CPUSET_H */ diff --git a/rocrtst/thirdparty/include/hwloc/cuda.h b/rocrtst/thirdparty/include/hwloc/cuda.h new file mode 100644 index 0000000000..20a3ac11ae --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/cuda.h @@ -0,0 +1,224 @@ +/* + * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the CUDA Driver API. + * + * Applications that use both hwloc and the CUDA Driver API may want to + * include this file so as to get topology information for CUDA devices. + * + */ + +#ifndef HWLOC_CUDA_H +#define HWLOC_CUDA_H + +#include +#include +#include +#ifdef HWLOC_LINUX_SYS +#include +#endif + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API + * + * This interface offers ways to retrieve topology information about + * CUDA devices when using the CUDA Driver API. + * + * @{ + */ + +/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice. + * + * Device \p cudevice must match the local machine. + */ +static __hwloc_inline int +hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, + CUdevice cudevice, int *domain, int *bus, int *dev) +{ + CUresult cres; + +#if CUDA_VERSION >= 4000 + cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice); + if (cres != CUDA_SUCCESS) { + errno = ENOSYS; + return -1; + } +#else + *domain = 0; +#endif + cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice); + if (cres != CUDA_SUCCESS) { + errno = ENOSYS; + return -1; + } + cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice); + if (cres != CUDA_SUCCESS) { + errno = ENOSYS; + return -1; + } + + return 0; +} + +/** \brief Get the CPU set of logical processors that are physically + * close to device \p cudevice. + * + * Return the CPU set describing the locality of the CUDA device \p cudevice. + * + * Topology \p topology and device \p cudevice must match the local machine. + * I/O devices detection and the CUDA component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_cuda_get_device_osdev() + * and hwloc_cuda_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + CUdevice cudevice, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX]; + FILE *sysfile = NULL; + int domainid, busid, deviceid; + + if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid)) + return -1; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid); + sysfile = fopen(path, "r"); + if (!sysfile) + return -1; + + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + fclose(sysfile); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc PCI device object corresponding to the + * CUDA device \p cudevice. + * + * Return the PCI device object describing the CUDA device \p cudevice. + * Return NULL if there is none. + * + * Topology \p topology and device \p cudevice must match the local machine. + * I/O devices detection must be enabled in topology \p topology. + * The CUDA component is not needed in the topology. + */ +static __hwloc_inline hwloc_obj_t +hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice) +{ + int domain, bus, dev; + + if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev)) + return NULL; + + return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0); +} + +/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice. + * + * Return the hwloc OS device object that describes the given + * CUDA device \p cudevice. Return NULL if there is none. + * + * Topology \p topology and device \p cudevice must match the local machine. + * I/O devices detection and the CUDA component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_cuda_get_device_cpuset(). + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer. + */ +static __hwloc_inline hwloc_obj_t +hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice) +{ + hwloc_obj_t osdev = NULL; + int domain, bus, dev; + + if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev)) + return NULL; + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + if (strncmp(osdev->name, "cuda", 4)) + continue; + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && (int) pcidev->attr->pcidev.domain == domain + && (int) pcidev->attr->pcidev.bus == bus + && (int) pcidev->attr->pcidev.dev == dev + && pcidev->attr->pcidev.func == 0) + return osdev; + } + + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to the + * CUDA device whose index is \p idx. + * + * Return the OS device object describing the CUDA device whose + * index is \p idx. Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the CUDA component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + * + * \note This function is identical to hwloc_cudart_get_device_osdev_by_index(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && !strncmp("cuda", osdev->name, 4) + && atoi(osdev->name + 4) == (int) idx) + return osdev; + } + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_CUDA_H */ diff --git a/rocrtst/thirdparty/include/hwloc/cudart.h b/rocrtst/thirdparty/include/hwloc/cudart.h new file mode 100644 index 0000000000..bbf50794a5 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/cudart.h @@ -0,0 +1,184 @@ +/* + * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the CUDA Runtime API. + * + * Applications that use both hwloc and the CUDA Runtime API may want to + * include this file so as to get topology information for CUDA devices. + * + */ + +#ifndef HWLOC_CUDART_H +#define HWLOC_CUDART_H + +#include +#include +#include +#ifdef HWLOC_LINUX_SYS +#include +#endif + +#include /* for CUDA_VERSION */ +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API + * + * This interface offers ways to retrieve topology information about + * CUDA devices when using the CUDA Runtime API. + * + * @{ + */ + +/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx. + * + * Device index \p idx must match the local machine. + */ +static __hwloc_inline int +hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, + int idx, int *domain, int *bus, int *dev) +{ + cudaError_t cerr; + struct cudaDeviceProp prop; + + cerr = cudaGetDeviceProperties(&prop, idx); + if (cerr) { + errno = ENOSYS; + return -1; + } + +#if CUDA_VERSION >= 4000 + *domain = prop.pciDomainID; +#else + *domain = 0; +#endif + + *bus = prop.pciBusID; + *dev = prop.pciDeviceID; + + return 0; +} + +/** \brief Get the CPU set of logical processors that are physically + * close to device \p idx. + * + * Return the CPU set describing the locality of the CUDA device + * whose index is \p idx. + * + * Topology \p topology and device \p idx must match the local machine. + * I/O devices detection and the CUDA component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_cudart_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + int idx, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX]; + FILE *sysfile = NULL; + int domain, bus, dev; + + if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev)) + return -1; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", (unsigned) domain, (unsigned) bus, (unsigned) dev); + sysfile = fopen(path, "r"); + if (!sysfile) + return -1; + + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + fclose(sysfile); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc PCI device object corresponding to the + * CUDA device whose index is \p idx. + * + * Return the PCI device object describing the CUDA device whose + * index is \p idx. Return NULL if there is none. + * + * Topology \p topology and device \p idx must match the local machine. + * I/O devices detection must be enabled in topology \p topology. + * The CUDA component is not needed in the topology. + */ +static __hwloc_inline hwloc_obj_t +hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx) +{ + int domain, bus, dev; + + if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev)) + return NULL; + + return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0); +} + +/** \brief Get the hwloc OS device object corresponding to the + * CUDA device whose index is \p idx. + * + * Return the OS device object describing the CUDA device whose + * index is \p idx. Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the CUDA component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_cudart_get_device_cpuset(). + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + * + * \note This function is identical to hwloc_cuda_get_device_osdev_by_index(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && !strncmp("cuda", osdev->name, 4) + && atoi(osdev->name + 4) == (int) idx) + return osdev; + } + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_CUDART_H */ diff --git a/rocrtst/thirdparty/include/hwloc/deprecated.h b/rocrtst/thirdparty/include/hwloc/deprecated.h new file mode 100644 index 0000000000..ac42c13de0 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/deprecated.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** + * This file contains the inline code of functions declared in hwloc.h + */ + +#ifndef HWLOC_DEPRECATED_H +#define HWLOC_DEPRECATED_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* backward compat with v1.10 before Socket->Package renaming */ +#define HWLOC_OBJ_SOCKET HWLOC_OBJ_PACKAGE +/* backward compat with v1.10 before Node->NUMANode clarification */ +#define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE + +/** \brief Return an object type from the string + * + * \return -1 if unrecognized. + */ +HWLOC_DECLSPEC hwloc_obj_type_t hwloc_obj_type_of_string (const char * string) __hwloc_attribute_pure __hwloc_attribute_deprecated; + +/** \brief Stringify a given topology object into a human-readable form. + * + * \note This function is deprecated in favor of hwloc_obj_type_snprintf() + * and hwloc_obj_attr_snprintf() since it is not very flexible and + * only prints physical/OS indexes. + * + * Fill string \p string up to \p size characters with the description + * of topology object \p obj in topology \p topology. + * + * If \p verbose is set, a longer description is used. Otherwise a + * short description is used. + * + * \p indexprefix is used to prefix the \p os_index attribute number of + * the object in the description. If \c NULL, the \c # character is used. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_snprintf(char * __hwloc_restrict string, size_t size, + hwloc_topology_t topology, hwloc_obj_t obj, + const char * __hwloc_restrict indexprefix, int verbose) __hwloc_attribute_deprecated; + +/** \brief Distribute \p n items over the topology under \p root + * + * Array \p cpuset will be filled with \p n cpusets recursively distributed + * linearly over the topology under \p root, down to depth \p until (which can + * be INT_MAX to distribute down to the finest level). + * + * This is typically useful when an application wants to distribute \p n + * threads over a machine, giving each of them as much private cache as + * possible and keeping them locally in number order. + * + * The caller may typically want to also call hwloc_bitmap_singlify() + * before binding a thread so that it does not move at all. + * + * \note This function requires the \p root object to have a CPU set. + */ +static __hwloc_inline void +hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *set, unsigned n, unsigned until) __hwloc_attribute_deprecated; +static __hwloc_inline void +hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *set, unsigned n, unsigned until) +{ + hwloc_distrib(topology, &root, 1, set, n, until, 0); +} + +/** \brief Distribute \p n items over the topology under \p roots + * + * This is the same as hwloc_distribute(), but takes an array of roots instead of + * just one root. + * + * \note This function requires the \p roots objects to have a CPU set. + */ +static __hwloc_inline void +hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *roots, unsigned n_roots, hwloc_cpuset_t *set, unsigned n, unsigned until) __hwloc_attribute_deprecated; +static __hwloc_inline void +hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *roots, unsigned n_roots, hwloc_cpuset_t *set, unsigned n, unsigned until) +{ + hwloc_distrib(topology, roots, n_roots, set, n, until, 0); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_DEPRECATED_H */ diff --git a/rocrtst/thirdparty/include/hwloc/diff.h b/rocrtst/thirdparty/include/hwloc/diff.h new file mode 100644 index 0000000000..01e3678083 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/diff.h @@ -0,0 +1,299 @@ +/* + * Copyright © 2013-2016 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Topology differences. + */ + +#ifndef HWLOC_DIFF_H +#define HWLOC_DIFF_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + + +/** \defgroup hwlocality_diff Topology differences + * + * Applications that manipulate many similar topologies, for instance + * one for each node of a homogeneous cluster, may want to compress + * topologies to reduce the memory footprint. + * + * This file offers a way to manipulate the difference between topologies + * and export/import it to/from XML. + * Compression may therefore be achieved by storing one topology + * entirely while the others are only described by their differences + * with the former. + * The actual topology can be reconstructed when actually needed by + * applying the precomputed difference to the reference topology. + * + * This interface targets very similar nodes. + * Only very simple differences between topologies are actually + * supported, for instance a change in the memory size, the name + * of the object, or some info attribute. + * More complex differences such as adding or removing objects cannot + * be represented in the difference structures and therefore return + * errors. + * + * It means that there is no need to apply the difference when + * looking at the tree organization (how many levels, how many + * objects per level, what kind of objects, CPU and node sets, etc) + * and when binding to objects. + * However the difference must be applied when looking at object + * attributes such as the name, the memory size or info attributes. + * + * @{ + */ + + +/** \brief Type of one object attribute difference. + */ +typedef enum hwloc_topology_diff_obj_attr_type_e { + /** \brief The object local memory is modified. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_uint64_s + * (and the index field is ignored). + */ + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE, + + /** \brief The object name is modified. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s + * (and the name field is ignored). + */ + + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME, + /** \brief the value of an info attribute is modified. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s. + */ + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO +} hwloc_topology_diff_obj_attr_type_t; + +/** \brief One object attribute difference. + */ +union hwloc_topology_diff_obj_attr_u { + struct hwloc_topology_diff_obj_attr_generic_s { + /* each part of the union must start with these */ + hwloc_topology_diff_obj_attr_type_t type; + } generic; + + /** \brief Integer attribute modification with an optional index. */ + struct hwloc_topology_diff_obj_attr_uint64_s { + /* used for storing integer attributes */ + hwloc_topology_diff_obj_attr_type_t type; + hwloc_uint64_t index; /* not used for SIZE */ + hwloc_uint64_t oldvalue; + hwloc_uint64_t newvalue; + } uint64; + + /** \brief String attribute modification with an optional name */ + struct hwloc_topology_diff_obj_attr_string_s { + /* used for storing name and info pairs */ + hwloc_topology_diff_obj_attr_type_t type; + char *name; /* not used for NAME */ + char *oldvalue; + char *newvalue; + } string; +}; + + +/** \brief Type of one element of a difference list. + */ +typedef enum hwloc_topology_diff_type_e { + /** \brief An object attribute was changed. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s. + */ + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR, + + /** \brief The difference is too complex, + * it cannot be represented. The difference below + * this object has not been checked. + * hwloc_topology_diff_build() will return 1. + * + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s. + */ + HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX +} hwloc_topology_diff_type_t; + +/** \brief One element of a difference list between two topologies. + */ +typedef union hwloc_topology_diff_u { + struct hwloc_topology_diff_generic_s { + /* each part of the union must start with these */ + hwloc_topology_diff_type_t type; + union hwloc_topology_diff_u * next; /* pointer to the next element of the list, or NULL */ + } generic; + + /* A difference in an object attribute. */ + struct hwloc_topology_diff_obj_attr_s { + hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */ + union hwloc_topology_diff_u * next; + /* List of attribute differences for a single object */ + unsigned obj_depth; + unsigned obj_index; + union hwloc_topology_diff_obj_attr_u diff; + } obj_attr; + + /* A difference that is too complex. */ + struct hwloc_topology_diff_too_complex_s { + hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */ + union hwloc_topology_diff_u * next; + /* Where we had to stop computing the diff in the first topology */ + unsigned obj_depth; + unsigned obj_index; + } too_complex; +} * hwloc_topology_diff_t; + + +/** \brief Compute the difference between 2 topologies. + * + * The difference is stored as a list of ::hwloc_topology_diff_t entries + * starting at \p diff. + * It is computed by doing a depth-first traversal of both topology trees + * simultaneously. + * + * If the difference between 2 objects is too complex to be represented + * (for instance if some objects have different types, or different numbers + * of children), a special diff entry of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX + * is queued. + * The computation of the diff does not continue below these objects. + * So each such diff entry means that the difference between two subtrees + * could not be computed. + * + * \return 0 if the difference can be represented properly. + * + * \return 0 with \p diff pointing to NULL if there is no difference + * between the topologies. + * + * \return 1 if the difference is too complex (see above). Some entries in + * the list will be of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX. + * + * \return -1 on any other error. + * + * \note \p flags is currently not used. It should be 0. + * + * \note The output diff has to be freed with hwloc_topology_diff_destroy(). + * + * \note The output diff can only be exported to XML or passed to + * hwloc_topology_diff_apply() if 0 was returned, i.e. if no entry of type + * ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed. + * + * \note The output diff may be modified by removing some entries from + * the list. The removed entries should be freed by passing them to + * to hwloc_topology_diff_destroy() (possible as another list). +*/ +HWLOC_DECLSPEC int hwloc_topology_diff_build(hwloc_topology_t topology, hwloc_topology_t newtopology, unsigned long flags, hwloc_topology_diff_t *diff); + +/** \brief Flags to be given to hwloc_topology_diff_apply(). + */ +enum hwloc_topology_diff_apply_flags_e { + /** \brief Apply topology diff in reverse direction. + * \hideinitializer + */ + HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE = (1UL<<0) +}; + +/** \brief Apply a topology diff to an existing topology. + * + * \p flags is an OR'ed set of ::hwloc_topology_diff_apply_flags_e. + * + * The new topology is modified in place. hwloc_topology_dup() + * may be used to duplicate it before patching. + * + * If the difference cannot be applied entirely, all previous applied + * elements are unapplied before returning. + * + * \return 0 on success. + * + * \return -N if applying the difference failed while trying + * to apply the N-th part of the difference. For instance -1 + * is returned if the very first difference element could not + * be applied. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags); + +/** \brief Destroy a list of topology differences. + * + * \note The \p topology parameter must be a valid topology + * but it is not required that it is related to \p diff. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_t topology, hwloc_topology_diff_t diff); + +/** \brief Load a list of topology differences from a XML file. + * + * If not \c NULL, \p refname will be filled with the identifier + * string of the reference topology for the difference file, + * if any was specified in the XML file. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * + * \note The \p topology parameter must be a valid topology + * but it is not required that it is related to \p diff. + * + * \note the pointer returned in refname should later be freed + * by the caller. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(hwloc_topology_t topology, const char *xmlpath, hwloc_topology_diff_t *diff, char **refname); + +/** \brief Export a list of topology differences to a XML file. + * + * If not \c NULL, \p refname defines an identifier string + * for the reference topology which was used as a base when + * computing this difference. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * This attribute is given back when reading the diff from XML. + * + * \note The \p topology parameter must be a valid topology + * but it is not required that it is related to \p diff. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_t topology, hwloc_topology_diff_t diff, const char *refname, const char *xmlpath); + +/** \brief Load a list of topology differences from a XML buffer. + * + * If not \c NULL, \p refname will be filled with the identifier + * string of the reference topology for the difference file, + * if any was specified in the XML file. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * + * \note The \p topology parameter must be a valid topology + * but it is not required that it is related to \p diff. + * + * \note the pointer returned in refname should later be freed + * by the caller. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(hwloc_topology_t topology, const char *xmlbuffer, int buflen, hwloc_topology_diff_t *diff, char **refname); + +/** \brief Export a list of topology differences to a XML buffer. + * + * If not \c NULL, \p refname defines an identifier string + * for the reference topology which was used as a base when + * computing this difference. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * This attribute is given back when reading the diff from XML. + * + * \note The XML buffer should later be freed with hwloc_free_xmlbuffer(). + * + * \note The \p topology parameter must be a valid topology + * but it is not required that it is related to \p diff. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_t topology, hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen); + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_DIFF_H */ diff --git a/rocrtst/thirdparty/include/hwloc/gl.h b/rocrtst/thirdparty/include/hwloc/gl.h new file mode 100644 index 0000000000..4b8b3f2308 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/gl.h @@ -0,0 +1,135 @@ +/* + * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. + * Copyright © 2012-2013 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and OpenGL displays. + * + * Applications that use both hwloc and OpenGL may want to include + * this file so as to get topology information for OpenGL displays. + */ + +#ifndef HWLOC_GL_H +#define HWLOC_GL_H + +#include + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_gl Interoperability with OpenGL displays + * + * This interface offers ways to retrieve topology information about + * OpenGL displays. + * + * Only the NVIDIA display locality information is currently available, + * using the NV-CONTROL X11 extension and the NVCtrl library. + * + * @{ + */ + +/** \brief Get the hwloc OS device object corresponding to the + * OpenGL display given by port and device index. + * + * Return the OS device object describing the OpenGL display + * whose port (server) is \p port and device (screen) is \p device. + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the GL component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology, + unsigned port, unsigned device) +{ + unsigned x = (unsigned) -1, y = (unsigned) -1; + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && sscanf(osdev->name, ":%u.%u", &x, &y) == 2 + && port == x && device == y) + return osdev; + } + errno = EINVAL; + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to the + * OpenGL display given by name. + * + * Return the OS device object describing the OpenGL display + * whose name is \p name, built as ":port.device" such as ":0.0" . + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the GL component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology, + const char *name) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && !strcmp(name, osdev->name)) + return osdev; + } + errno = EINVAL; + return NULL; +} + +/** \brief Get the OpenGL display port and device corresponding + * to the given hwloc OS object. + * + * Return the OpenGL display port (server) in \p port and device (screen) + * in \p screen that correspond to the given hwloc OS device object. + * Return \c -1 if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the GL component must be enabled in the topology. + */ +static __hwloc_inline int +hwloc_gl_get_display_by_osdev(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_obj_t osdev, + unsigned *port, unsigned *device) +{ + unsigned x = -1, y = -1; + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && sscanf(osdev->name, ":%u.%u", &x, &y) == 2) { + *port = x; + *device = y; + return 0; + } + errno = EINVAL; + return -1; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_GL_H */ + diff --git a/rocrtst/thirdparty/include/hwloc/glibc-sched.h b/rocrtst/thirdparty/include/hwloc/glibc-sched.h new file mode 100644 index 0000000000..1f9ba7cddc --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/glibc-sched.h @@ -0,0 +1,125 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2013 inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and glibc scheduling routines. + * + * Applications that use both hwloc and glibc scheduling routines such as + * sched_getaffinity() or pthread_attr_setaffinity_np() may want to include + * this file so as to ease conversion between their respective types. + */ + +#ifndef HWLOC_GLIBC_SCHED_H +#define HWLOC_GLIBC_SCHED_H + +#include +#include +#include + +#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) +#error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef HWLOC_HAVE_CPU_SET + + +/** \defgroup hwlocality_glibc_sched Interoperability with glibc sched affinity + * + * This interface offers ways to convert between hwloc cpusets and glibc cpusets + * such as those manipulated by sched_getaffinity() or pthread_attr_setaffinity_np(). + * + * \note Topology \p topology must match the current machine. + * + * @{ + */ + + +/** \brief Convert hwloc CPU set \p toposet into glibc sched affinity CPU set \p schedset + * + * This function may be used before calling sched_setaffinity or any other function + * that takes a cpu_set_t as input parameter. + * + * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC + */ +static __hwloc_inline int +hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset, + cpu_set_t *schedset, size_t schedsetsize) +{ +#ifdef CPU_ZERO_S + unsigned cpu; + CPU_ZERO_S(schedsetsize, schedset); + hwloc_bitmap_foreach_begin(cpu, hwlocset) + CPU_SET_S(cpu, schedsetsize, schedset); + hwloc_bitmap_foreach_end(); +#else /* !CPU_ZERO_S */ + unsigned cpu; + CPU_ZERO(schedset); + assert(schedsetsize == sizeof(cpu_set_t)); + hwloc_bitmap_foreach_begin(cpu, hwlocset) + CPU_SET(cpu, schedset); + hwloc_bitmap_foreach_end(); +#endif /* !CPU_ZERO_S */ + return 0; +} + +/** \brief Convert glibc sched affinity CPU set \p schedset into hwloc CPU set + * + * This function may be used before calling sched_setaffinity or any other function + * that takes a cpu_set_t as input parameter. + * + * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC + */ +static __hwloc_inline int +hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset, + const cpu_set_t *schedset, size_t schedsetsize) +{ + int cpu; +#ifdef CPU_ZERO_S + int count; +#endif + hwloc_bitmap_zero(hwlocset); +#ifdef CPU_ZERO_S + count = CPU_COUNT_S(schedsetsize, schedset); + cpu = 0; + while (count) { + if (CPU_ISSET_S(cpu, schedsetsize, schedset)) { + hwloc_bitmap_set(hwlocset, cpu); + count--; + } + cpu++; + } +#else /* !CPU_ZERO_S */ + /* sched.h does not support dynamic cpu_set_t (introduced in glibc 2.7), + * assume we have a very old interface without CPU_COUNT (added in 2.6) + */ + assert(schedsetsize == sizeof(cpu_set_t)); + for(cpu=0; cpu +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set + * @{ + */ + +/** \brief Get the first largest object included in the given cpuset \p set. + * + * \return the first object that is included in \p set and whose parent is not. + * + * This is convenient for iterating over all largest objects within a CPU set + * by doing a loop getting the first largest object and clearing its CPU set + * from the remaining CPU set. + * + * \note This function cannot work if the root object does not have a CPU set, + * e.g. if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + if (!obj->cpuset || !hwloc_bitmap_intersects(obj->cpuset, set)) + return NULL; + while (!hwloc_bitmap_isincluded(obj->cpuset, set)) { + /* while the object intersects without being included, look at its children */ + hwloc_obj_t child = obj->first_child; + while (child) { + if (child->cpuset && hwloc_bitmap_intersects(child->cpuset, set)) + break; + child = child->next_sibling; + } + if (!child) + /* no child intersects, return their father */ + return obj; + /* found one intersecting child, look at its children */ + obj = child; + } + /* obj is included, return it */ + return obj; +} + +/** \brief Get the set of largest objects covering exactly a given cpuset \p set + * + * \return the number of objects returned in \p objs. + * + * \note This function cannot work if the root object does not have a CPU set, + * e.g. if the topology is made of different machines. + */ +HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_t * __hwloc_restrict objs, int max); + +/** \brief Return the next object at depth \p depth included in CPU set \p set. + * + * If \p prev is \c NULL, return the first object at depth \p depth + * included in \p set. The next invokation should pass the previous + * return value in \p prev so as to obtain the next object in \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + if (!next || !next->cpuset) + return NULL; + while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set))) + next = next->next_cousin; + return next; +} + +/** \brief Return the next object of type \p type included in CPU set \p set. + * + * If there are multiple or no depth for given type, return \c NULL + * and let the caller fallback to + * hwloc_get_next_obj_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev); +} + +/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, unsigned idx) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, unsigned idx) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + unsigned count = 0; + if (!obj || !obj->cpuset) + return NULL; + while (obj) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) { + if (count == idx) + return obj; + count++; + } + obj = obj->next_cousin; + } + return NULL; +} + +/** \brief Return the \p idx -th object of type \p type included in CPU set \p set. + * + * If there are multiple or no depth for given type, return \c NULL + * and let the caller fallback to + * hwloc_get_obj_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx); +} + +/** \brief Return the number of objects at depth \p depth included in CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline unsigned +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth) __hwloc_attribute_pure; +static __hwloc_inline unsigned +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + unsigned count = 0; + if (!obj || !obj->cpuset) + return 0; + while (obj) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) + count++; + obj = obj->next_cousin; + } + return count; +} + +/** \brief Return the number of objects of type \p type included in CPU set \p set. + * + * If no object for that type exists inside CPU set \p set, 0 is + * returned. If there are several levels with objects of that type + * inside CPU set \p set, -1 is returned. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline int +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return 0; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return -1; /* FIXME: agregate nbobjs from different levels? */ + return hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth); +} + +/** \brief Return the logical index among the objects included in CPU set \p set. + * + * Consult all objects in the same level as \p obj and inside CPU set \p set + * in the logical order, and return the index of \p obj within them. + * If \p set covers the entire topology, this is the logical index of \p obj. + * Otherwise, this is similar to a logical index within the part of the topology + * defined by CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + */ +static __hwloc_inline int +hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t obj) +{ + int idx = 0; + if (!hwloc_bitmap_isincluded(obj->cpuset, set)) + return -1; + /* count how many objects are inside the cpuset on the way from us to the beginning of the level */ + while ((obj = obj->prev_cousin) != NULL) + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) + idx++; + return idx; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set + * @{ + */ + +/** \brief Get the child covering at least CPU set \p set. + * + * \return \c NULL if no child matches or if \p set is empty. + * + * \note This function cannot work if parent does not have a CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) +{ + hwloc_obj_t child; + if (!parent->cpuset || hwloc_bitmap_iszero(set)) + return NULL; + child = parent->first_child; + while (child) { + if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset)) + return child; + child = child->next_sibling; + } + return NULL; +} + +/** \brief Get the lowest object covering at least CPU set \p set + * + * \return \c NULL if no object matches or if \p set is empty. + * + * \note This function cannot work if the root object does not have a CPU set, + * e.g. if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + struct hwloc_obj *current = hwloc_get_root_obj(topology); + if (hwloc_bitmap_iszero(set) || !current->cpuset || !hwloc_bitmap_isincluded(set, current->cpuset)) + return NULL; + while (1) { + hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current); + if (!child) + return current; + current = child; + } +} + +/** \brief Iterate through same-depth objects covering at least CPU set \p set + * + * If object \p prev is \c NULL, return the first object at depth \p + * depth covering at least part of CPU set \p set. The next + * invokation should pass the previous return value in \p prev so as + * to obtain the next object covering at least another part of \p set. + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + if (!next || !next->cpuset) + return NULL; + while (next && !hwloc_bitmap_intersects(set, next->cpuset)) + next = next->next_cousin; + return next; +} + +/** \brief Iterate through same-type objects covering at least CPU set \p set + * + * If object \p prev is \c NULL, return the first object of type \p + * type covering at least part of CPU set \p set. The next invokation + * should pass the previous return value in \p prev so as to obtain + * the next object of type \p type covering at least another part of + * \p set. + * + * If there are no or multiple depths for type \p type, \c NULL is returned. + * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth() + * for each depth. + * + * \note This function cannot work if objects of the given type do + * not have CPU sets or if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev); +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Returns the ancestor object of \p obj at depth \p depth. */ +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj; + if (obj->depth < depth) + return NULL; + while (ancestor && ancestor->depth > depth) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the ancestor object of \p obj with type \p type. */ +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj->parent; + while (ancestor && ancestor->type != type) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the common parent object to objects \p obj1 and \p obj2 */ +static __hwloc_inline hwloc_obj_t +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + /* the loop isn't so easy since intermediate ancestors may have + * different depth, causing us to alternate between using obj1->parent + * and obj2->parent. Also, even if at some point we find ancestors of + * of the same depth, their ancestors may have different depth again. + */ + while (obj1 != obj2) { + while (obj1->depth > obj2->depth) + obj1 = obj1->parent; + while (obj2->depth > obj1->depth) + obj2 = obj2->parent; + if (obj1 != obj2 && obj1->depth == obj2->depth) { + obj1 = obj1->parent; + obj2 = obj2->parent; + } + } + return obj1; +} + +/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root. + * + * \note This function assumes that both \p obj and \p subtree_root have a \p cpuset. + */ +static __hwloc_inline int +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) +{ + return hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset); +} + +/** \brief Return the next child. + * + * If \p prev is \c NULL, return the first child. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev) +{ + if (!prev) + return parent->first_child; + if (prev->parent != parent) + return NULL; + return prev->next_sibling; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_cache Looking at Cache Objects + * @{ + */ + +/** \brief Find the depth of cache objects matching cache depth and type. + * + * Return the depth of the topology level that contains cache objects + * whose attributes match \p cachedepth and \p cachetype. This function + * intends to disambiguate the case where hwloc_get_type_depth() returns + * ::HWLOC_TYPE_DEPTH_MULTIPLE. + * + * If no cache level matches, ::HWLOC_TYPE_DEPTH_UNKNOWN is returned. + * + * If \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED, the depth of the + * unique matching unified cache level is returned. + * + * If \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION, + * either a matching cache, or a unified cache is returned. + * + * If \p cachetype is \c -1, it is ignored and multiple levels may + * match. The function returns either the depth of a uniquely matching + * level or ::HWLOC_TYPE_DEPTH_MULTIPLE. + */ +static __hwloc_inline int +hwloc_get_cache_type_depth (hwloc_topology_t topology, + unsigned cachelevel, hwloc_obj_cache_type_t cachetype) +{ + int depth; + int found = HWLOC_TYPE_DEPTH_UNKNOWN; + for (depth=0; ; depth++) { + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); + if (!obj) + break; + if (obj->type != HWLOC_OBJ_CACHE || obj->attr->cache.depth != cachelevel) + /* doesn't match, try next depth */ + continue; + if (cachetype == (hwloc_obj_cache_type_t) -1) { + if (found != HWLOC_TYPE_DEPTH_UNKNOWN) { + /* second match, return MULTIPLE */ + return HWLOC_TYPE_DEPTH_MULTIPLE; + } + /* first match, mark it as found */ + found = depth; + continue; + } + if (obj->attr->cache.type == cachetype || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED) + /* exact match (either unified is alone, or we match instruction or data), return immediately */ + return depth; + } + /* went to the bottom, return what we found */ + return found; +} + +/** \brief Get the first cache covering a cpuset \p set + * + * \return \c NULL if no cache matches. + * + * \note This function cannot work if the root object does not have a CPU set, + * e.g. if the topology is made of different machines. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set); + while (current) { + if (current->type == HWLOC_OBJ_CACHE) + return current; + current = current->parent; + } + return NULL; +} + +/** \brief Get the first cache shared between an object and somebody else. + * + * \return \c NULL if no cache matches or if an invalid object is given. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) +{ + hwloc_obj_t current = obj->parent; + if (!obj->cpuset) + return NULL; + while (current && current->cpuset) { + if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset) + && current->type == HWLOC_OBJ_CACHE) + return current; + current = current->parent; + } + return NULL; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_misc Finding objects, miscellaneous helpers + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index. + * + * This function is useful for converting a CPU set into the PU + * objects it contains. + * When retrieving the current binding (e.g. with hwloc_get_cpubind()), + * one may iterate over the bits of the resulting CPU set with + * hwloc_bitmap_foreach_begin(), and find the corresponding PUs + * with this function. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL) + if (obj->os_index == os_index) + return obj; + return NULL; +} + +/** \brief Returns the object of type ::HWLOC_OBJ_NUMANODE with \p os_index. + * + * This function is useful for converting a nodeset into the NUMA node + * objects it contains. + * When retrieving the current binding (e.g. with hwloc_get_membind_nodeset()), + * one may iterate over the bits of the resulting nodeset with + * hwloc_bitmap_foreach_begin(), and find the corresponding NUMA nodes + * with this function. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL) + if (obj->os_index == os_index) + return obj; + return NULL; +} + +/** \brief Do a depth-first traversal of the topology to find and sort + * + * all objects that are at the same depth than \p src. + * Report in \p objs up to \p max physically closest ones to \p src. + * + * \return the number of objects returned in \p objs. + * + * \return 0 if \p src is an I/O object. + * + * \note This function requires the \p src object to have a CPU set. + */ +/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */ +HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max); + +/** \brief Find an object below another object, both specified by types and indexes. + * + * Start from the top system object and find object of type \p type1 + * and logical index \p idx1. Then look below this object and find another + * object of type \p type2 and logical index \p idx2. Indexes are specified + * within the parent, not withing the entire system. + * + * For instance, if type1 is PACKAGE, idx1 is 2, type2 is CORE and idx2 + * is 3, return the fourth core object below the third package. + * + * \note This function requires these objects to have a CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_by_type (hwloc_topology_t topology, + hwloc_obj_type_t type1, unsigned idx1, + hwloc_obj_type_t type2, unsigned idx2) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_by_type (hwloc_topology_t topology, + hwloc_obj_type_t type1, unsigned idx1, + hwloc_obj_type_t type2, unsigned idx2) +{ + hwloc_obj_t obj; + obj = hwloc_get_obj_by_type (topology, type1, idx1); + if (!obj || !obj->cpuset) + return NULL; + return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2); +} + +/** \brief Find an object below a chain of objects specified by types and indexes. + * + * This is a generalized version of hwloc_get_obj_below_by_type(). + * + * Arrays \p typev and \p idxv must contain \p nr types and indexes. + * + * Start from the top system object and walk the arrays \p typev and \p idxv. + * For each type and logical index couple in the arrays, look under the previously found + * object to find the index-th object of the given type. + * Indexes are specified within the parent, not withing the entire system. + * + * For instance, if nr is 3, typev contains NODE, PACKAGE and CORE, + * and idxv contains 0, 1 and 2, return the third core object below + * the second package below the first NUMA node. + * + * \note This function requires all these objects and the root object + * to have a CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + int i; + for(i=0; icpuset) + return NULL; + obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]); + } + return obj; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_distribute Distributing items over a topology + * @{ + */ + +/** \brief Flags to be given to hwloc_distrib(). + */ +enum hwloc_distrib_flags_e { + /** \brief Distrib in reverse order, starting from the last objects. + * \hideinitializer + */ + HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0) +}; + +/** \brief Distribute \p n items over the topology under \p roots + * + * Array \p set will be filled with \p n cpusets recursively distributed + * linearly over the topology under objects \p roots, down to depth \p until + * (which can be INT_MAX to distribute down to the finest level). + * + * \p n_roots is usually 1 and \p roots only contains the topology root object + * so as to distribute over the entire topology. + * + * This is typically useful when an application wants to distribute \p n + * threads over a machine, giving each of them as much private cache as + * possible and keeping them locally in number order. + * + * The caller may typically want to also call hwloc_bitmap_singlify() + * before binding a thread so that it does not move at all. + * + * \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e. + * + * \note This function requires the \p roots objects to have a CPU set. + * + * \note This function replaces the now deprecated hwloc_distribute() + * and hwloc_distributev() functions. + */ +static __hwloc_inline int +hwloc_distrib(hwloc_topology_t topology, + hwloc_obj_t *roots, unsigned n_roots, + hwloc_cpuset_t *set, + unsigned n, + unsigned until, unsigned long flags) +{ + unsigned i; + unsigned tot_weight; + unsigned given, givenweight; + hwloc_cpuset_t *cpusetp = set; + + if (flags & ~HWLOC_DISTRIB_FLAG_REVERSE) { + errno = EINVAL; + return -1; + } + + tot_weight = 0; + for (i = 0; i < n_roots; i++) + if (roots[i]->cpuset) + tot_weight += hwloc_bitmap_weight(roots[i]->cpuset); + + for (i = 0, given = 0, givenweight = 0; i < n_roots; i++) { + unsigned chunk, weight; + hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i]; + hwloc_cpuset_t cpuset = root->cpuset; + if (!cpuset) + continue; + weight = hwloc_bitmap_weight(cpuset); + if (!weight) + continue; + /* Give to root a chunk proportional to its weight. + * If previous chunks got rounded-up, we may get a bit less. */ + chunk = (( (givenweight+weight) * n + tot_weight-1) / tot_weight) + - (( givenweight * n + tot_weight-1) / tot_weight); + if (!root->arity || chunk <= 1 || root->depth >= until) { + /* We can't split any more, put everything there. */ + if (chunk) { + /* Fill cpusets with ours */ + unsigned j; + for (j=0; j < chunk; j++) + cpusetp[j] = hwloc_bitmap_dup(cpuset); + } else { + /* We got no chunk, just merge our cpuset to a previous one + * (the first chunk cannot be empty) + * so that this root doesn't get ignored. + */ + assert(given); + hwloc_bitmap_or(cpusetp[-1], cpusetp[-1], cpuset); + } + } else { + /* Still more to distribute, recurse into children */ + hwloc_distrib(topology, root->children, root->arity, cpusetp, chunk, until, flags); + } + cpusetp += chunk; + given += chunk; + givenweight += weight; + } + + return 0; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_topology_sets CPU and node sets of entire topologies + * @{ + */ +/** \brief Get complete CPU set + * + * \return the complete CPU set of logical processors of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->complete_cpuset; +} + +/** \brief Get topology CPU set + * + * \return the CPU set of logical processors of the system for which hwloc + * provides topology information. This is equivalent to the cpuset of the + * system object. If the topology is the result of a combination of several + * systems, NULL is returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->cpuset; +} + +/** \brief Get online CPU set + * + * \return the CPU set of online logical processors of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_online_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_online_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->online_cpuset; +} + +/** \brief Get allowed CPU set + * + * \return the CPU set of allowed logical processors of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_cpuset_t +hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->allowed_cpuset; +} + +/** \brief Get complete node set + * + * \return the complete node set of memory of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_nodeset_t +hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_nodeset_t +hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->complete_nodeset; +} + +/** \brief Get topology node set + * + * \return the node set of memory of the system for which hwloc + * provides topology information. This is equivalent to the nodeset of the + * system object. If the topology is the result of a combination of several + * systems, NULL is returned. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_nodeset_t +hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_nodeset_t +hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->nodeset; +} + +/** \brief Get allowed node set + * + * \return the node set of allowed memory of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_nodeset_t +hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; +static __hwloc_inline hwloc_const_nodeset_t +hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->allowed_nodeset; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_nodeset_convert Converting between CPU sets and node sets + * + * There are two semantics for converting cpusets to nodesets depending on how + * non-NUMA machines are handled. + * + * When manipulating nodesets for memory binding, non-NUMA machines should be + * considered as having a single NUMA node. The standard conversion routines + * below should be used so that marking the first bit of the nodeset means + * that memory should be bound to a non-NUMA whole machine. + * + * When manipulating nodesets as an actual list of NUMA nodes without any + * need to handle memory binding on non-NUMA machines, the strict conversion + * routines may be used instead. + * @{ + */ + +/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases + * + * If some NUMA nodes have no CPUs at all, this function never sets their + * indexes in the output node set, even if a full CPU set is given in input. + * + * If the topology contains no NUMA nodes, the machine is considered + * as a single memory node, and the following behavior is used: + * If \p cpuset is empty, \p nodeset will be emptied as well. + * Otherwise \p nodeset will be entirely filled. + */ +static __hwloc_inline void +hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t obj; + + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { + if (hwloc_bitmap_iszero(_cpuset)) + hwloc_bitmap_zero(nodeset); + else + /* Assume the whole system */ + hwloc_bitmap_fill(nodeset); + return; + } + + hwloc_bitmap_zero(nodeset); + obj = NULL; + while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL) + hwloc_bitmap_set(nodeset, obj->os_index); +} + +/** \brief Convert a CPU set into a NUMA node set without handling non-NUMA cases + * + * This is the strict variant of hwloc_cpuset_to_nodeset(). It does not fix + * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly + * the same. However, if the topology contains no NUMA nodes, return an empty + * nodeset. + */ +static __hwloc_inline void +hwloc_cpuset_to_nodeset_strict(struct hwloc_topology *topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t obj; + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) + return; + hwloc_bitmap_zero(nodeset); + obj = NULL; + while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL) + hwloc_bitmap_set(nodeset, obj->os_index); +} + +/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases + * + * If the topology contains no NUMA nodes, the machine is considered + * as a single memory node, and the following behavior is used: + * If \p nodeset is empty, \p cpuset will be emptied as well. + * Otherwise \p cpuset will be entirely filled. + * This is useful for manipulating memory binding sets. + */ +static __hwloc_inline void +hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t obj; + + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) { + if (hwloc_bitmap_iszero(nodeset)) + hwloc_bitmap_zero(_cpuset); + else + /* Assume the whole system */ + hwloc_bitmap_fill(_cpuset); + return; + } + + hwloc_bitmap_zero(_cpuset); + obj = NULL; + while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) { + if (hwloc_bitmap_isset(nodeset, obj->os_index)) + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset); + } +} + +/** \brief Convert a NUMA node set into a CPU set without handling non-NUMA cases + * + * This is the strict variant of hwloc_cpuset_from_nodeset(). It does not fix + * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly + * the same. However, if the topology contains no NUMA nodes, return an empty + * cpuset. + */ +static __hwloc_inline void +hwloc_cpuset_from_nodeset_strict(struct hwloc_topology *topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t obj; + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) + return; + hwloc_bitmap_zero(_cpuset); + obj = NULL; + while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) + if (hwloc_bitmap_isset(nodeset, obj->os_index)) + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset); +} + +/** @} */ + + + +/** \defgroup hwlocality_distances Manipulating Distances + * @{ + */ + +/** \brief Get the distances between all objects at the given depth. + * + * \return a distances structure containing a matrix with all distances + * between all objects at the given depth. + * + * Slot i+nbobjs*j contains the distance from the object of logical index i + * the object of logical index j. + * + * \note This function only returns matrices covering the whole topology, + * without any unknown distance value. Those matrices are available in + * top-level object of the hierarchy. Matrices of lower objects are not + * reported here since they cover only part of the machine. + * + * The returned structure belongs to the hwloc library. The caller should + * not modify or free it. + * + * \return \c NULL if no such distance matrix exists. + */ + +static __hwloc_inline const struct hwloc_distances_s * +hwloc_get_whole_distance_matrix_by_depth(hwloc_topology_t topology, unsigned depth) +{ + hwloc_obj_t root = hwloc_get_root_obj(topology); + unsigned i; + for(i=0; idistances_count; i++) + if (root->distances[i]->relative_depth == depth) + return root->distances[i]; + return NULL; +} + +/** \brief Get the distances between all objects of a given type. + * + * \return a distances structure containing a matrix with all distances + * between all objects of the given type. + * + * Slot i+nbobjs*j contains the distance from the object of logical index i + * the object of logical index j. + * + * \note This function only returns matrices covering the whole topology, + * without any unknown distance value. Those matrices are available in + * top-level object of the hierarchy. Matrices of lower objects are not + * reported here since they cover only part of the machine. + * + * The returned structure belongs to the hwloc library. The caller should + * not modify or free it. + * + * \return \c NULL if no such distance matrix exists. + */ + +static __hwloc_inline const struct hwloc_distances_s * +hwloc_get_whole_distance_matrix_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth < 0) + return NULL; + return hwloc_get_whole_distance_matrix_by_depth(topology, depth); +} + +/** \brief Get distances for the given depth and covering some objects + * + * Return a distance matrix that describes depth \p depth and covers at + * least object \p obj and all its children. + * + * When looking for the distance between some objects, a common ancestor should + * be passed in \p obj. + * + * \p firstp is set to logical index of the first object described by the matrix. + * + * The returned structure belongs to the hwloc library. The caller should + * not modify or free it. + */ +static __hwloc_inline const struct hwloc_distances_s * +hwloc_get_distance_matrix_covering_obj_by_depth(hwloc_topology_t topology, + hwloc_obj_t obj, unsigned depth, + unsigned *firstp) +{ + while (obj && obj->cpuset) { + unsigned i; + for(i=0; idistances_count; i++) + if (obj->distances[i]->relative_depth == depth - obj->depth) { + if (!obj->distances[i]->nbobjs) + continue; + *firstp = hwloc_get_next_obj_inside_cpuset_by_depth(topology, obj->cpuset, depth, NULL)->logical_index; + return obj->distances[i]; + } + obj = obj->parent; + } + return NULL; +} + +/** \brief Get the latency in both directions between two objects. + * + * Look at ancestor objects from the bottom to the top until one of them + * contains a distance matrix that matches the objects exactly. + * + * \p latency gets the value from object \p obj1 to \p obj2, while + * \p reverse_latency gets the reverse-direction value, which + * may be different on some architectures. + * + * \return -1 if no ancestor contains a matching latency matrix. + */ +static __hwloc_inline int +hwloc_get_latency(hwloc_topology_t topology, + hwloc_obj_t obj1, hwloc_obj_t obj2, + float *latency, float *reverse_latency) +{ + hwloc_obj_t ancestor; + const struct hwloc_distances_s * distances; + unsigned first_logical ; + + if (obj1->depth != obj2->depth) { + errno = EINVAL; + return -1; + } + + ancestor = hwloc_get_common_ancestor_obj(topology, obj1, obj2); + distances = hwloc_get_distance_matrix_covering_obj_by_depth(topology, ancestor, obj1->depth, &first_logical); + if (distances && distances->latency) { + const float * latency_matrix = distances->latency; + unsigned nbobjs = distances->nbobjs; + unsigned l1 = obj1->logical_index - first_logical; + unsigned l2 = obj2->logical_index - first_logical; + *latency = latency_matrix[l1*nbobjs+l2]; + *reverse_latency = latency_matrix[l2*nbobjs+l1]; + return 0; + } + + errno = ENOSYS; + return -1; +} + +/** @} */ + + + +/** \defgroup hwlocality_advanced_io Finding I/O objects + * @{ + */ + +/** \brief Get the first non-I/O ancestor object. + * + * Given the I/O object \p ioobj, find the smallest non-I/O ancestor + * object. This regular object may then be used for binding because + * its locality is the same as \p ioobj. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_non_io_ancestor_obj(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_obj_t ioobj) +{ + hwloc_obj_t obj = ioobj; + while (obj && !obj->cpuset) { + obj = obj->parent; + } + return obj; +} + +/** \brief Get the next PCI device in the system. + * + * \return the first PCI device if \p prev is \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev) +{ + return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, prev); +} + +/** \brief Find the PCI device object matching the PCI bus id + * given domain, bus device and function PCI bus id. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_pcidev_by_busid(hwloc_topology_t topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { + if (obj->attr->pcidev.domain == domain + && obj->attr->pcidev.bus == bus + && obj->attr->pcidev.dev == dev + && obj->attr->pcidev.func == func) + return obj; + } + return NULL; +} + +/** \brief Find the PCI device object matching the PCI bus id + * given as a string xxxx:yy:zz.t or yy:zz.t. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid) +{ + unsigned domain = 0; /* default */ + unsigned bus, dev, func; + + if (sscanf(busid, "%x:%x.%x", &bus, &dev, &func) != 3 + && sscanf(busid, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { + errno = EINVAL; + return NULL; + } + + return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, func); +} + +/** \brief Get the next OS device in the system. + * + * \return the first OS device if \p prev is \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev) +{ + return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, prev); +} + +/** \brief Get the next bridge in the system. + * + * \return the first bridge if \p prev is \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev) +{ + return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_BRIDGE, prev); +} + +/* \brief Checks whether a given bridge covers a given PCI bus. + */ +static __hwloc_inline int +hwloc_bridge_covers_pcibus(hwloc_obj_t bridge, + unsigned domain, unsigned bus) +{ + return bridge->type == HWLOC_OBJ_BRIDGE + && bridge->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI + && bridge->attr->bridge.downstream.pci.domain == domain + && bridge->attr->bridge.downstream.pci.secondary_bus <= bus + && bridge->attr->bridge.downstream.pci.subordinate_bus >= bus; +} + +/** \brief Find the hostbridge that covers the given PCI bus. + * + * This is useful for finding the locality of a bus because + * it is the hostbridge parent cpuset. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_hostbridge_by_pcibus(hwloc_topology_t topology, + unsigned domain, unsigned bus) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) { + if (hwloc_bridge_covers_pcibus(obj, domain, bus)) { + /* found bridge covering this pcibus, make sure it's a hostbridge */ + assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST); + assert(obj->parent->type != HWLOC_OBJ_BRIDGE); + assert(obj->parent->cpuset); + return obj; + } + } + return NULL; +} + +/** @} */ + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_HELPER_H */ diff --git a/rocrtst/thirdparty/include/hwloc/inlines.h b/rocrtst/thirdparty/include/hwloc/inlines.h new file mode 100644 index 0000000000..7281750a87 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/inlines.h @@ -0,0 +1,154 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2013 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** + * This file contains the inline code of functions declared in hwloc.h + */ + +#ifndef HWLOC_INLINES_H +#define HWLOC_INLINES_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +static __hwloc_inline int +hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + return depth; + + /* find the highest existing level with type order >= */ + for(depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); ; depth--) + if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) < 0) + return depth+1; + + /* Shouldn't ever happen, as there is always a SYSTEM level with lower order and known depth. */ + /* abort(); */ +} + +static __hwloc_inline int +hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + return depth; + + /* find the lowest existing level with type order <= */ + for(depth = 0; ; depth++) + if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) > 0) + return depth-1; + + /* Shouldn't ever happen, as there is always a PU level with higher order and known depth. */ + /* abort(); */ +} + +static __hwloc_inline int +hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return 0; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return -1; /* FIXME: agregate nbobjs from different levels? */ + return hwloc_get_nbobjs_by_depth(topology, depth); +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_obj_by_depth(topology, depth, idx); +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev) +{ + if (!prev) + return hwloc_get_obj_by_depth (topology, depth, 0); + if (prev->depth != depth) + return NULL; + return prev->next_cousin; +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, + hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_by_depth (topology, depth, prev); +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_root_obj (hwloc_topology_t topology) +{ + return hwloc_get_obj_by_depth (topology, 0, 0); +} + +static __hwloc_inline const char * +hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) +{ + unsigned i; + for(i=0; iinfos_count; i++) + if (!strcmp(obj->infos[i].name, name)) + return obj->infos[i].value; + return NULL; +} + +static __hwloc_inline void * +hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + void *p = hwloc_alloc_membind_nodeset(topology, len, nodeset, policy, flags); + if (p) + return p; + hwloc_set_membind_nodeset(topology, nodeset, policy, flags); + p = hwloc_alloc(topology, len); + if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH) + /* Enforce the binding by touching the data */ + memset(p, 0, len); + return p; +} + +static __hwloc_inline void * +hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags) +{ + void *p = hwloc_alloc_membind(topology, len, set, policy, flags); + if (p) + return p; + hwloc_set_membind(topology, set, policy, flags); + p = hwloc_alloc(topology, len); + if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH) + /* Enforce the binding by touching the data */ + memset(p, 0, len); + return p; +} + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_INLINES_H */ diff --git a/rocrtst/thirdparty/include/hwloc/intel-mic.h b/rocrtst/thirdparty/include/hwloc/intel-mic.h new file mode 100644 index 0000000000..b8cf4d5985 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/intel-mic.h @@ -0,0 +1,143 @@ +/* + * Copyright © 2013-2016 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC). + * + * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to + * include this file so as to get topology information for MIC devices. + */ + +#ifndef HWLOC_INTEL_MIC_H +#define HWLOC_INTEL_MIC_H + +#include +#include +#include +#ifdef HWLOC_LINUX_SYS +#include +#include +#include +#endif + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC) + * + * This interface offers ways to retrieve topology information about + * Intel Xeon Phi (MIC) devices. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to MIC device whose index is \p idx. + * + * Return the CPU set describing the locality of the MIC device whose index is \p idx. + * + * Topology \p topology and device index \p idx must match the local machine. + * I/O devices detection is not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_intel_mic_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + int idx __hwloc_attribute_unused, + hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX]; + DIR *sysdir = NULL; + FILE *sysfile = NULL; + struct dirent *dirent; + unsigned pcibus, pcidev, pcifunc; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/class/mic/mic%d", idx); + sysdir = opendir(path); + if (!sysdir) + return -1; + + while ((dirent = readdir(sysdir)) != NULL) { + if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) { + sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc); + sysfile = fopen(path, "r"); + if (!sysfile) { + closedir(sysdir); + return -1; + } + + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + fclose(sysfile); + break; + } + } + + closedir(sysdir); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * MIC device for the given index. + * + * Return the OS device object describing the MIC device whose index is \p idx. + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology, + unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && !strncmp("mic", osdev->name, 3) + && atoi(osdev->name + 3) == (int) idx) + return osdev; + } + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_INTEL_MIC_H */ diff --git a/rocrtst/thirdparty/include/hwloc/linux-libnuma.h b/rocrtst/thirdparty/include/hwloc/linux-libnuma.h new file mode 100644 index 0000000000..74dbfd0a96 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/linux-libnuma.h @@ -0,0 +1,355 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2010, 2012 Université Bordeaux + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Linux libnuma. + * + * Applications that use both Linux libnuma and hwloc may want to + * include this file so as to ease conversion between their respective types. +*/ + +#ifndef HWLOC_LINUX_LIBNUMA_H +#define HWLOC_LINUX_LIBNUMA_H + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_linux_libnuma_ulongs Interoperability with Linux libnuma unsigned long masks + * + * This interface helps converting between Linux libnuma unsigned long masks + * and hwloc cpusets and nodesets. + * + * It also offers a consistent behavior on non-NUMA machines + * or non-NUMA-aware kernels by assuming that the machines have a single + * NUMA node. + * + * \note Topology \p topology must match the current machine. + * + * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware. + * (when CONFIG_NUMA is not set in the kernel configuration). + * This helper and libnuma may thus not be strictly compatible in this case, + * which may be detected by checking whether numa_available() returns -1. + * + * @{ + */ + + +/** \brief Convert hwloc CPU set \p cpuset into the array of unsigned long \p mask + * + * \p mask is the array of unsigned long that will be filled. + * \p maxnode contains the maximal node number that may be stored in \p mask. + * \p maxnode will be set to the maximal node number that was found, plus one. + * + * This function may be used before calling set_mempolicy, mbind, migrate_pages + * or any other function that takes an array of unsigned long and a maximal + * node number as input parameter. + */ +static __hwloc_inline int +hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, + unsigned long *mask, unsigned long *maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + unsigned long outmaxnode = -1; + + /* round-up to the next ulong and clear all bytes */ + *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1); + memset(mask, 0, *maxnode/8); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) { + if (node->os_index >= *maxnode) + continue; + mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8)); + if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index) + outmaxnode = node->os_index; + } + + } else { + /* if no numa, libnuma assumes we have a single node */ + if (!hwloc_bitmap_iszero(cpuset)) { + mask[0] = 1; + outmaxnode = 0; + } + } + + *maxnode = outmaxnode+1; + return 0; +} + +/** \brief Convert hwloc NUMA node set \p nodeset into the array of unsigned long \p mask + * + * \p mask is the array of unsigned long that will be filled. + * \p maxnode contains the maximal node number that may be stored in \p mask. + * \p maxnode will be set to the maximal node number that was found, plus one. + * + * This function may be used before calling set_mempolicy, mbind, migrate_pages + * or any other function that takes an array of unsigned long and a maximal + * node number as input parameter. + */ +static __hwloc_inline int +hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, + unsigned long *mask, unsigned long *maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + unsigned long outmaxnode = -1; + + /* round-up to the next ulong and clear all bytes */ + *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1); + memset(mask, 0, *maxnode/8); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) { + if (node->os_index >= *maxnode) + continue; + if (!hwloc_bitmap_isset(nodeset, node->os_index)) + continue; + mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8)); + if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index) + outmaxnode = node->os_index; + } + + } else { + /* if no numa, libnuma assumes we have a single node */ + if (!hwloc_bitmap_iszero(nodeset)) { + mask[0] = 1; + outmaxnode = 0; + } + } + + *maxnode = outmaxnode+1; + return 0; +} + +/** \brief Convert the array of unsigned long \p mask into hwloc CPU set + * + * \p mask is a array of unsigned long that will be read. + * \p maxnode contains the maximal node number that may be read in \p mask. + * + * This function may be used after calling get_mempolicy or any other function + * that takes an array of unsigned long as output parameter (and possibly + * a maximal node number as input parameter). + */ +static __hwloc_inline int +hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset, + const unsigned long *mask, unsigned long maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(cpuset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (node->os_index < maxnode + && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); + } else { + /* if no numa, libnuma assumes we have a single node */ + if (mask[0] & 1) + hwloc_bitmap_copy(cpuset, hwloc_topology_get_complete_cpuset(topology)); + else + hwloc_bitmap_zero(cpuset); + } + + return 0; +} + +/** \brief Convert the array of unsigned long \p mask into hwloc NUMA node set + * + * \p mask is a array of unsigned long that will be read. + * \p maxnode contains the maximal node number that may be read in \p mask. + * + * This function may be used after calling get_mempolicy or any other function + * that takes an array of unsigned long as output parameter (and possibly + * a maximal node number as input parameter). + */ +static __hwloc_inline int +hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset, + const unsigned long *mask, unsigned long maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(nodeset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (node->os_index < maxnode + && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) + hwloc_bitmap_set(nodeset, node->os_index); + } else { + /* if no numa, libnuma assumes we have a single node */ + if (mask[0] & 1) + hwloc_bitmap_fill(nodeset); + else + hwloc_bitmap_zero(nodeset); + } + + return 0; +} + +/** @} */ + + + +/** \defgroup hwlocality_linux_libnuma_bitmask Interoperability with Linux libnuma bitmask + * + * This interface helps converting between Linux libnuma bitmasks + * and hwloc cpusets and nodesets. + * + * It also offers a consistent behavior on non-NUMA machines + * or non-NUMA-aware kernels by assuming that the machines have a single + * NUMA node. + * + * \note Topology \p topology must match the current machine. + * + * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware. + * (when CONFIG_NUMA is not set in the kernel configuration). + * This helper and libnuma may thus not be strictly compatible in this case, + * which may be detected by checking whether numa_available() returns -1. + * + * @{ + */ + + +/** \brief Convert hwloc CPU set \p cpuset into the returned libnuma bitmask + * + * The returned bitmask should later be freed with numa_bitmask_free. + * + * This function may be used before calling many numa_ functions + * that use a struct bitmask as an input parameter. + * + * \return newly allocated struct bitmask. + */ +static __hwloc_inline struct bitmask * +hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc; +static __hwloc_inline struct bitmask * +hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + struct bitmask *bitmask = numa_allocate_cpumask(); + if (!bitmask) + return NULL; + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) + if (node->memory.local_memory) + numa_bitmask_setbit(bitmask, node->os_index); + } else { + /* if no numa, libnuma assumes we have a single node */ + if (!hwloc_bitmap_iszero(cpuset)) + numa_bitmask_setbit(bitmask, 0); + } + + return bitmask; +} + +/** \brief Convert hwloc NUMA node set \p nodeset into the returned libnuma bitmask + * + * The returned bitmask should later be freed with numa_bitmask_free. + * + * This function may be used before calling many numa_ functions + * that use a struct bitmask as an input parameter. + * + * \return newly allocated struct bitmask. + */ +static __hwloc_inline struct bitmask * +hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc; +static __hwloc_inline struct bitmask * +hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + struct bitmask *bitmask = numa_allocate_cpumask(); + if (!bitmask) + return NULL; + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (hwloc_bitmap_isset(nodeset, node->os_index) && node->memory.local_memory) + numa_bitmask_setbit(bitmask, node->os_index); + } else { + /* if no numa, libnuma assumes we have a single node */ + if (!hwloc_bitmap_iszero(nodeset)) + numa_bitmask_setbit(bitmask, 0); + } + + return bitmask; +} + +/** \brief Convert libnuma bitmask \p bitmask into hwloc CPU set \p cpuset + * + * This function may be used after calling many numa_ functions + * that use a struct bitmask as an output parameter. + */ +static __hwloc_inline int +hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset, + const struct bitmask *bitmask) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(cpuset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (numa_bitmask_isbitset(bitmask, node->os_index)) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); + } else { + /* if no numa, libnuma assumes we have a single node */ + if (numa_bitmask_isbitset(bitmask, 0)) + hwloc_bitmap_copy(cpuset, hwloc_topology_get_complete_cpuset(topology)); + else + hwloc_bitmap_zero(cpuset); + } + + return 0; +} + +/** \brief Convert libnuma bitmask \p bitmask into hwloc NUMA node set \p nodeset + * + * This function may be used after calling many numa_ functions + * that use a struct bitmask as an output parameter. + */ +static __hwloc_inline int +hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset, + const struct bitmask *bitmask) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(nodeset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (numa_bitmask_isbitset(bitmask, node->os_index)) + hwloc_bitmap_set(nodeset, node->os_index); + } else { + /* if no numa, libnuma assumes we have a single node */ + if (numa_bitmask_isbitset(bitmask, 0)) + hwloc_bitmap_fill(nodeset); + else + hwloc_bitmap_zero(nodeset); + } + + return 0; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_LINUX_NUMA_H */ diff --git a/rocrtst/thirdparty/include/hwloc/linux.h b/rocrtst/thirdparty/include/hwloc/linux.h new file mode 100644 index 0000000000..3990546515 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/linux.h @@ -0,0 +1,77 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Linux. + * + * Applications that use hwloc on Linux may want to include this file + * if using some low-level Linux features. + */ + +#ifndef HWLOC_LINUX_H +#define HWLOC_LINUX_H + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_linux Linux-specific helpers + * + * This includes helpers for manipulating Linux kernel cpumap files, and hwloc + * equivalents of the Linux sched_setaffinity and sched_getaffinity system calls. + * + * @{ + */ + +/** \brief Convert a linux kernel cpumap file \p file into hwloc CPU set. + * + * Might be used when reading CPU set from sysfs attributes such as topology + * and caches for processors, or local_cpus for devices. + */ +HWLOC_DECLSPEC int hwloc_linux_parse_cpumap_file(FILE *file, hwloc_cpuset_t set); + +/** \brief Bind a thread \p tid on cpus given in cpuset \p set + * + * The behavior is exactly the same as the Linux sched_setaffinity system call, + * but uses a hwloc cpuset. + * + * \note This is equivalent to calling hwloc_set_proc_cpubind() with + * HWLOC_CPUBIND_THREAD as flags. + */ +HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set); + +/** \brief Get the current binding of thread \p tid + * + * The behavior is exactly the same as the Linux sched_getaffinity system call, + * but uses a hwloc cpuset. + * + * \note This is equivalent to calling hwloc_get_proc_cpubind() with + * ::HWLOC_CPUBIND_THREAD as flags. + */ +HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set); + +/** \brief Get the last physical CPU where thread \p tid ran. + * + * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with + * ::HWLOC_CPUBIND_THREAD as flags. + */ +HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology, pid_t tid, hwloc_bitmap_t set); + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_LINUX_H */ diff --git a/rocrtst/thirdparty/include/hwloc/myriexpress.h b/rocrtst/thirdparty/include/hwloc/myriexpress.h new file mode 100644 index 0000000000..68ff88f5ad --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/myriexpress.h @@ -0,0 +1,127 @@ +/* + * Copyright © 2010-2014 Inria. All rights reserved. + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Myrinet Express. + * + * Applications that use both hwloc and Myrinet Express verbs may want to + * include this file so as to get topology information for Myrinet hardware. + * + */ + +#ifndef HWLOC_MYRIEXPRESS_H +#define HWLOC_MYRIEXPRESS_H + +#include +#include + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_myriexpress Interoperability with Myrinet Express + * + * This interface offers ways to retrieve topology information about + * Myrinet Express hardware. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close the MX board \p id. + * + * Return the CPU set describing the locality of the Myrinet Express + * board whose index is \p id. + * + * Topology \p topology and device \p id must match the local machine. + * I/O devices detection is not needed in the topology. + * + * The function only returns the locality of the device. + * No additional information about the device is available. + */ +static __hwloc_inline int +hwloc_mx_board_get_device_cpuset(hwloc_topology_t topology, + unsigned id, hwloc_cpuset_t set) +{ + uint32_t in, out; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + in = id; + if (mx_get_info(NULL, MX_NUMA_NODE, &in, sizeof(in), &out, sizeof(out)) != MX_SUCCESS) { + errno = EINVAL; + return -1; + } + + if (out != (uint32_t) -1) { + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL) + if (obj->os_index == out) { + hwloc_bitmap_copy(set, obj->cpuset); + goto out; + } + } + /* fallback to the full topology cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + out: + return 0; +} + +/** \brief Get the CPU set of logical processors that are physically + * close the MX endpoint \p endpoint. + * + * Return the CPU set describing the locality of the Myrinet Express + * board that runs the MX endpoint \p endpoint. + * + * Topology \p topology and device \p id must match the local machine. + * I/O devices detection is not needed in the topology. + * + * The function only returns the locality of the endpoint. + * No additional information about the endpoint or device is available. + */ +static __hwloc_inline int +hwloc_mx_endpoint_get_device_cpuset(hwloc_topology_t topology, + mx_endpoint_t endpoint, hwloc_cpuset_t set) +{ + uint64_t nid; + uint32_t nindex, eid; + mx_endpoint_addr_t eaddr; + + if (mx_get_endpoint_addr(endpoint, &eaddr) != MX_SUCCESS) { + errno = EINVAL; + return -1; + } + + if (mx_decompose_endpoint_addr(eaddr, &nid, &eid) != MX_SUCCESS) { + errno = EINVAL; + return -1; + } + + if (mx_nic_id_to_board_number(nid, &nindex) != MX_SUCCESS) { + errno = EINVAL; + return -1; + } + + return hwloc_mx_board_get_device_cpuset(topology, nindex, set); +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_MYRIEXPRESS_H */ diff --git a/rocrtst/thirdparty/include/hwloc/nvml.h b/rocrtst/thirdparty/include/hwloc/nvml.h new file mode 100644 index 0000000000..961d41a64b --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/nvml.h @@ -0,0 +1,176 @@ +/* + * Copyright © 2012-2016 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the NVIDIA Management Library. + * + * Applications that use both hwloc and the NVIDIA Management Library may want to + * include this file so as to get topology information for NVML devices. + */ + +#ifndef HWLOC_NVML_H +#define HWLOC_NVML_H + +#include +#include +#include +#ifdef HWLOC_LINUX_SYS +#include +#endif + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library + * + * This interface offers ways to retrieve topology information about + * devices managed by the NVIDIA Management Library (NVML). + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to NVML device \p device. + * + * Return the CPU set describing the locality of the NVML device \p device. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the NVML component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_nvml_get_device_osdev() + * and hwloc_nvml_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + nvmlDevice_t device, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX]; + FILE *sysfile = NULL; + nvmlReturn_t nvres; + nvmlPciInfo_t pci; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + nvres = nvmlDeviceGetPciInfo(device, &pci); + if (NVML_SUCCESS != nvres) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device); + sysfile = fopen(path, "r"); + if (!sysfile) + return -1; + + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + fclose(sysfile); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * NVML device whose index is \p idx. + * + * Return the OS device object describing the NVML device whose + * index is \p idx. Returns NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the NVML component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && !strncmp("nvml", osdev->name, 4) + && atoi(osdev->name + 4) == (int) idx) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to NVML device \p device. + * + * Return the hwloc OS device object that describes the given + * NVML device \p device. Return NULL if there is none. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the NVML component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_nvml_get_device_cpuset(). + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer. + */ +static __hwloc_inline hwloc_obj_t +hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device) +{ + hwloc_obj_t osdev; + nvmlReturn_t nvres; + nvmlPciInfo_t pci; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return NULL; + } + + nvres = nvmlDeviceGetPciInfo(device, &pci); + if (NVML_SUCCESS != nvres) + return NULL; + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + if (strncmp(osdev->name, "nvml", 4)) + continue; + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && pcidev->attr->pcidev.domain == pci.domain + && pcidev->attr->pcidev.bus == pci.bus + && pcidev->attr->pcidev.dev == pci.device + && pcidev->attr->pcidev.func == 0) + return osdev; + } + + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_NVML_H */ diff --git a/rocrtst/thirdparty/include/hwloc/opencl.h b/rocrtst/thirdparty/include/hwloc/opencl.h new file mode 100644 index 0000000000..7c08f41eef --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/opencl.h @@ -0,0 +1,200 @@ +/* + * Copyright © 2012-2017 Inria. All rights reserved. + * Copyright © 2013 Université Bordeaux. All right reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the OpenCL interface. + * + * Applications that use both hwloc and OpenCL may want to + * include this file so as to get topology information for OpenCL devices. + */ + +#ifndef HWLOC_OPENCL_H +#define HWLOC_OPENCL_H + +#include +#include +#include +#ifdef HWLOC_LINUX_SYS +#include +#endif + +#include +#include + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_opencl Interoperability with OpenCL + * + * This interface offers ways to retrieve topology information about + * OpenCL devices. + * + * Only the AMD OpenCL interface currently offers useful locality information + * about its devices. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to OpenCL device \p device. + * + * Return the CPU set describing the locality of the OpenCL device \p device. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the OpenCL component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_opencl_get_device_osdev() + * and hwloc_opencl_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux with the AMD OpenCL implementation; other systems will simply + * get a full cpuset. + */ +static __hwloc_inline int +hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + cl_device_id device __hwloc_attribute_unused, + hwloc_cpuset_t set) +{ +#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD) + /* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */ +#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX]; + FILE *sysfile = NULL; + cl_device_topology_amd amdtopo; + cl_int clret; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); + if (CL_SUCCESS != clret) { + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + return 0; + } + if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + return 0; + } + + sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus", + (unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function); + sysfile = fopen(path, "r"); + if (!sysfile) + return -1; + + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + fclose(sysfile); +#else + /* Non-Linux + AMD OpenCL systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * OpenCL device for the given indexes. + * + * Return the OS device object describing the OpenCL device + * whose platform index is \p platform_index, + * and whose device index within this platform if \p device_index. + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the OpenCL component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology, + unsigned platform_index, unsigned device_index) +{ + unsigned x = (unsigned) -1, y = (unsigned) -1; + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && sscanf(osdev->name, "opencl%ud%u", &x, &y) == 2 + && platform_index == x && device_index == y) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to OpenCL device \p device. + * + * Return the hwloc OS device object that describes the given + * OpenCL device \p device. Return NULL if there is none. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the OpenCL component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_opencl_get_device_cpuset(). + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer. + */ +static __hwloc_inline hwloc_obj_t +hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused, + cl_device_id device __hwloc_attribute_unused) +{ +#ifdef CL_DEVICE_TOPOLOGY_AMD + hwloc_obj_t osdev; + cl_device_topology_amd amdtopo; + cl_int clret; + + clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); + if (CL_SUCCESS != clret) { + errno = EINVAL; + return NULL; + } + if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + errno = EINVAL; + return NULL; + } + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + if (strncmp(osdev->name, "opencl", 6)) + continue; + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && pcidev->attr->pcidev.domain == 0 + && pcidev->attr->pcidev.bus == amdtopo.pcie.bus + && pcidev->attr->pcidev.dev == amdtopo.pcie.device + && pcidev->attr->pcidev.func == amdtopo.pcie.function) + return osdev; + } + + return NULL; +#else + return NULL; +#endif +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_OPENCL_H */ diff --git a/rocrtst/thirdparty/include/hwloc/openfabrics-verbs.h b/rocrtst/thirdparty/include/hwloc/openfabrics-verbs.h new file mode 100644 index 0000000000..a66566fba9 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/openfabrics-verbs.h @@ -0,0 +1,157 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and OpenFabrics + * verbs. + * + * Applications that use both hwloc and OpenFabrics verbs may want to + * include this file so as to get topology information for OpenFabrics + * hardware (InfiniBand, etc). + * + */ + +#ifndef HWLOC_OPENFABRICS_VERBS_H +#define HWLOC_OPENFABRICS_VERBS_H + +#include +#include +#ifdef HWLOC_LINUX_SYS +#include +#endif + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_openfabrics Interoperability with OpenFabrics + * + * This interface offers ways to retrieve topology information about + * OpenFabrics devices (InfiniBand, Omni-Path, usNIC, etc). + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to device \p ibdev. + * + * Return the CPU set describing the locality of the OpenFabrics + * device \p ibdev (InfiniBand, etc). + * + * Topology \p topology and device \p ibdev must match the local machine. + * I/O devices detection is not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_ibv_get_device_osdev() + * and hwloc_ibv_get_device_osdev_by_name(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + struct ibv_device *ibdev, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the verbs-provided sysfs mechanism to + get the local cpus */ +#define HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX 128 + char path[HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX]; + FILE *sysfile = NULL; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/class/infiniband/%s/device/local_cpus", + ibv_get_device_name(ibdev)); + sysfile = fopen(path, "r"); + if (!sysfile) + return -1; + + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + + fclose(sysfile); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the OpenFabrics + * device named \p ibname. + * + * Return the OS device object describing the OpenFabrics device + * (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname + * (mlx5_0, hfi1_0, usnic_0, qib0, etc). + * Returns NULL if there is none. + * The name \p ibname is usually obtained from ibv_get_device_name(). + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology, + const char *ibname) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_OPENFABRICS == osdev->attr->osdev.type + && osdev->name && !strcmp(ibname, osdev->name)) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to the OpenFabrics + * device \p ibdev. + * + * Return the OS device object describing the OpenFabrics device \p ibdev + * (InfiniBand, etc). Returns NULL if there is none. + * + * Topology \p topology and device \p ibdev must match the local machine. + * I/O devices detection must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_ibv_get_device_cpuset(). + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_ibv_get_device_osdev(hwloc_topology_t topology, + struct ibv_device *ibdev) +{ + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return NULL; + } + return hwloc_ibv_get_device_osdev_by_name(topology, ibv_get_device_name(ibdev)); +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_OPENFABRICS_VERBS_H */ diff --git a/rocrtst/thirdparty/include/hwloc/plugins.h b/rocrtst/thirdparty/include/hwloc/plugins.h new file mode 100644 index 0000000000..510157bcf5 --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/plugins.h @@ -0,0 +1,439 @@ +/* + * Copyright © 2013-2015 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#ifndef HWLOC_PLUGINS_H +#define HWLOC_PLUGINS_H + +/** \file + * \brief Public interface for building hwloc plugins. + */ + +struct hwloc_backend; + +#include +#ifdef HWLOC_INSIDE_PLUGIN +/* needed for hwloc_plugin_check_namespace() */ +#include +#endif + + + +/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components + * @{ + */ + +/** \brief Discovery component type */ +typedef enum hwloc_disc_component_type_e { + /** \brief CPU-only discovery through the OS, or generic no-OS support. + * \hideinitializer */ + HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0), + + /** \brief xml, synthetic or custom, + * platform-specific components such as bgq. + * Anything the discovers CPU and everything else. + * No misc backend is expected to complement a global component. + * \hideinitializer */ + HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1), + + /** \brief OpenCL, Cuda, etc. + * \hideinitializer */ + HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2) +} hwloc_disc_component_type_t; + +/** \brief Discovery component structure + * + * This is the major kind of components, taking care of the discovery. + * They are registered by generic components, either statically-built or as plugins. + */ +struct hwloc_disc_component { + /** \brief Discovery component type */ + hwloc_disc_component_type_t type; + + /** \brief Name. + * If this component is built as a plugin, this name does not have to match the plugin filename. + */ + const char *name; + + /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. + * + * For a GLOBAL component, this usually includes all other types (~0). + * + * Other components only exclude types that may bring conflicting + * topology information. MISC components should likely not be excluded + * since they usually bring non-primary additional information. + */ + unsigned excludes; + + /** \brief Instantiate callback to create a backend from the component. + * Parameters data1, data2, data3 are NULL except for components + * that have special enabling routines such as hwloc_topology_set_xml(). */ + struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3); + + /** \brief Component priority. + * Used to sort topology->components, higher priority first. + * Also used to decide between two components with the same name. + * + * Usual values are + * 50 for native OS (or platform) components, + * 45 for x86, + * 40 for no-OS fallback, + * 30 for global components (xml/synthetic/custom), + * 20 for pci, + * 10 for other misc components (opencl etc.). + */ + unsigned priority; + + /** \private Used internally to list components by priority on topology->components + * (the component structure is usually read-only, + * the core copies it before using this field for queueing) + */ + struct hwloc_disc_component * next; +}; + +/** @} */ + + + + +/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends + * @{ + */ + +/** \brief Discovery backend structure + * + * A backend is the instantiation of a discovery component. + * When a component gets enabled for a topology, + * its instantiate() callback creates a backend. + * + * hwloc_backend_alloc() initializes all fields to default values + * that the component may change (except "component" and "next") + * before enabling the backend with hwloc_backend_enable(). + */ +struct hwloc_backend { + /** \private Reserved for the core, set by hwloc_backend_alloc() */ + struct hwloc_disc_component * component; + /** \private Reserved for the core, set by hwloc_backend_enable() */ + struct hwloc_topology * topology; + /** \private Reserved for the core. Set to 1 if forced through envvar, 0 otherwise. */ + int envvar_forced; + /** \private Reserved for the core. Used internally to list backends topology->backends. */ + struct hwloc_backend * next; + + /** \brief Backend flags, as an OR'ed set of ::hwloc_backend_flag_e */ + unsigned long flags; + + /** \brief Backend-specific 'is_custom' property. + * Shortcut on !strcmp(..->component->name, "custom"). + * Only the custom component should touch this. */ + int is_custom; + + /** \brief Backend-specific 'is_thissystem' property. + * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled. + * Set to -1 if the backend doesn't care (default). */ + int is_thissystem; + + /** \brief Backend private data, or NULL if none. */ + void * private_data; + /** \brief Callback for freeing the private_data. + * May be NULL. + */ + void (*disable)(struct hwloc_backend *backend); + + /** \brief Main discovery callback. + * returns > 0 if it modified the topology tree, -1 on error, 0 otherwise. + * May be NULL if type is ::HWLOC_DISC_COMPONENT_TYPE_MISC. */ + int (*discover)(struct hwloc_backend *backend); + + /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend. + * May be NULL. */ + int (*get_obj_cpuset)(struct hwloc_backend *backend, struct hwloc_backend *caller, struct hwloc_obj *obj, hwloc_bitmap_t cpuset); + + /** \brief Callback called by backends to notify this backend that a new object was added. + * returns > 0 if it modified the topology tree, 0 otherwise. + * May be NULL. */ + int (*notify_new_object)(struct hwloc_backend *backend, struct hwloc_backend *caller, struct hwloc_obj *obj); +}; + +/** \brief Backend flags */ +enum hwloc_backend_flag_e { + /** \brief Levels should be reconnected before this backend discover() is used. + * \hideinitializer */ + HWLOC_BACKEND_FLAG_NEED_LEVELS = (1UL<<0) +}; + +/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc. + * The caller will then modify whatever needed, and call hwloc_backend_enable(). + */ +HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component); + +/** \brief Enable a previously allocated and setup backend. */ +HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend); + +/** \brief Used by backends discovery callbacks to request locality information from others. + * + * Traverse the list of enabled backends until one has a + * get_obj_cpuset() method, and call it. + */ +HWLOC_DECLSPEC int hwloc_backends_get_obj_cpuset(struct hwloc_backend *caller, struct hwloc_obj *obj, hwloc_bitmap_t cpuset); + +/** \brief Used by backends discovery callbacks to notify other + * backends of new objects. + * + * Traverse the list of enabled backends (all but caller) and invoke + * their notify_new_object() method to notify them that a new object + * just got added to the topology. + * + * Currently only used for notifying of new PCI device objects. + */ +HWLOC_DECLSPEC int hwloc_backends_notify_new_object(struct hwloc_backend *caller, struct hwloc_obj *obj); + +/** @} */ + + + + +/** \defgroup hwlocality_generic_components Components and Plugins: Generic components + * @{ + */ + +/** \brief Generic component type */ +typedef enum hwloc_component_type_e { + /** \brief The data field must point to a struct hwloc_disc_component. */ + HWLOC_COMPONENT_TYPE_DISC, + + /** \brief The data field must point to a struct hwloc_xml_component. */ + HWLOC_COMPONENT_TYPE_XML +} hwloc_component_type_t; + +/** \brief Generic component structure + * + * Generic components structure, either statically listed by configure in static-components.h + * or dynamically loaded as a plugin. + */ +struct hwloc_component { + /** \brief Component ABI version, set to ::HWLOC_COMPONENT_ABI */ + unsigned abi; + + /** \brief Process-wide component initialization callback. + * + * This optional callback is called when the component is registered + * to the hwloc core (after loading the plugin). + * + * When the component is built as a plugin, this callback + * should call hwloc_check_plugin_namespace() + * and return an negative error code on error. + * + * \p flags is always 0 for now. + * + * \return 0 on success, or a negative code on error. + * + * \note If the component uses ltdl for loading its own plugins, + * it should load/unload them only in init() and finalize(), + * to avoid race conditions with hwloc's use of ltdl. + */ + int (*init)(unsigned long flags); + + /** \brief Process-wide component termination callback. + * + * This optional callback is called after unregistering the component + * from the hwloc core (before unloading the plugin). + * + * \p flags is always 0 for now. + * + * \note If the component uses ltdl for loading its own plugins, + * it should load/unload them only in init() and finalize(), + * to avoid race conditions with hwloc's use of ltdl. + */ + void (*finalize)(unsigned long flags); + + /** \brief Component type */ + hwloc_component_type_t type; + + /** \brief Component flags, unused for now */ + unsigned long flags; + + /** \brief Component data, pointing to a struct hwloc_disc_component or struct hwloc_xml_component. */ + void * data; +}; + +/** @} */ + + + + +/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components + * @{ + */ + +/** \brief Add an object to the topology. + * + * It is sorted along the tree of other objects according to the inclusion of + * cpusets, to eventually be added as a child of the smallest object including + * this object. + * + * If the cpuset is empty, the type of the object (and maybe some attributes) + * must be enough to find where to insert the object. This is especially true + * for NUMA nodes with memory and no CPUs. + * + * The given object should not have children. + * + * This shall only be called before levels are built. + * + * In case of error, hwloc_report_os_error() is called. + * + * Returns the object on success. + * Returns NULL and frees obj on error. + * Returns another object and frees obj if it was merged with an identical pre-existing object. + */ +HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj); + +/** \brief Type of error callbacks during object insertion */ +typedef void (*hwloc_report_error_t)(const char * msg, int line); +/** \brief Report an insertion error from a backend */ +HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line); +/** \brief Check whether insertion errors are hidden */ +HWLOC_DECLSPEC int hwloc_hide_errors(void); + +/** \brief Add an object to the topology and specify which error callback to use. + * + * Aside from the error callback selection, this function is identical to hwloc_insert_object_by_cpuset() + */ +HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj, hwloc_report_error_t report_error); + +/** \brief Insert an object somewhere in the topology. + * + * It is added as the last child of the given parent. + * The cpuset is completely ignored, so strange objects such as I/O devices should + * preferably be inserted with this. + * + * When used for "normal" children with cpusets (when importing from XML + * when duplicating a topology), the caller should make sure children are inserted + * in order. + * + * The given object may have children. + * + * Remember to call topology_connect() afterwards to fix handy pointers. + */ +HWLOC_DECLSPEC void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj); + +/** \brief Allocate and initialize an object of the given type and physical index */ +static __hwloc_inline struct hwloc_obj * +hwloc_alloc_setup_object(hwloc_obj_type_t type, signed os_index) +{ + struct hwloc_obj *obj = malloc(sizeof(*obj)); + memset(obj, 0, sizeof(*obj)); + obj->type = type; + obj->os_index = os_index; + obj->os_level = -1; + obj->attr = malloc(sizeof(*obj->attr)); + memset(obj->attr, 0, sizeof(*obj->attr)); + /* do not allocate the cpuset here, let the caller do it */ + return obj; +} + +/** \brief Setup object cpusets/nodesets by OR'ing its children. + * + * Used when adding an object late in the topology, after propagating sets up and down. + * The caller should use this after inserting by cpuset (which means the cpusets is already OK). + * Typical case: PCI backend adding a hostbridge parent. + */ +HWLOC_DECLSPEC int hwloc_fill_object_sets(hwloc_obj_t obj); + +/** \brief Make sure that plugins can lookup core symbols. + * + * This is a sanity check to avoid lazy-lookup failures when libhwloc + * is loaded within a plugin, and later tries to load its own plugins. + * This may fail (and abort the program) if libhwloc symbols are in a + * private namespace. + * + * \return 0 on success. + * \return -1 if the plugin cannot be successfully loaded. The caller + * plugin init() callback should return a negative error code as well. + * + * Plugins should call this function in their init() callback to avoid + * later crashes if lazy symbol resolution is used by the upper layer that + * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY). + * + * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if + * building the caller as a plugin. + * + * \note This function should remain inline so plugins can call it even + * when they cannot find libhwloc symbols. + */ +static __hwloc_inline int +hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused) +{ +#ifdef HWLOC_INSIDE_PLUGIN + lt_dlhandle handle; + void *sym; + handle = lt_dlopen(NULL); + if (!handle) + /* cannot check, assume things will work */ + return 0; + sym = lt_dlsym(handle, symbol); + lt_dlclose(handle); + if (!sym) { + static int verboseenv_checked = 0; + static int verboseenv_value = 0; + if (!verboseenv_checked) { + const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE"); + verboseenv_value = verboseenv ? atoi(verboseenv) : 0; + verboseenv_checked = 1; + } + if (verboseenv_value) + fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n", + pluginname, symbol); + return -1; + } +#endif /* HWLOC_INSIDE_PLUGIN */ + return 0; +} + +/** @} */ + + + + +/** \defgroup hwlocality_components_pci_funcs Components and Plugins: PCI functions to be used by components + * @{ + */ + +/** \brief Insert a list of PCI devices and bridges in the backend topology. + * + * Insert a list of objects (either PCI device or bridges) starting at first_obj + * (linked by next_sibling in the topology, and ending with NULL). + * Objects are placed under the right bridges, and the remaining upstream bridges + * are then inserted in the topology by calling the get_obj_cpuset() callback to + * find their locality. + */ +HWLOC_DECLSPEC int hwloc_insert_pci_device_list(struct hwloc_backend *backend, struct hwloc_obj *first_obj); + +/** \brief Return the offset of the given capability in the PCI config space buffer + * + * This function requires a 256-bytes config space. Unknown/unavailable bytes should be set to 0xff. + */ +HWLOC_DECLSPEC unsigned hwloc_pci_find_cap(const unsigned char *config, unsigned cap); + +/** \brief Fill linkspeed by reading the PCI config space where PCI_CAP_ID_EXP is at position offset. + * + * Needs 20 bytes of EXP capability block starting at offset in the config space + * for registers up to link status. + */ +HWLOC_DECLSPEC int hwloc_pci_find_linkspeed(const unsigned char *config, unsigned offset, float *linkspeed); + +/** \brief Modify the PCI device object into a bridge and fill its attribute if a bridge is found in the PCI config space. + * + * This function requires 64 bytes of common configuration header at the beginning of config. + * + * Returns -1 and destroys /p obj if bridge fields are invalid. + */ +HWLOC_DECLSPEC int hwloc_pci_prepare_bridge(hwloc_obj_t obj, const unsigned char *config); + +/** @} */ + + + + +#endif /* HWLOC_PLUGINS_H */ diff --git a/rocrtst/thirdparty/include/hwloc/rename.h b/rocrtst/thirdparty/include/hwloc/rename.h new file mode 100644 index 0000000000..908b064c1e --- /dev/null +++ b/rocrtst/thirdparty/include/hwloc/rename.h @@ -0,0 +1,705 @@ +/* + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2010-2017 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#ifndef HWLOC_RENAME_H +#define HWLOC_RENAME_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Only enact these defines if we're actually renaming the symbols + (i.e., avoid trying to have no-op defines if we're *not* + renaming). */ + +#if HWLOC_SYM_TRANSFORM + +/* Use a preprocessor two-step in order to get the prefixing right. + Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming + things. */ + +#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b) +#define HWLOC_MUNGE_NAME2(a, b) a ## b +#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name) +#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name) + +/* Now define all the "real" names to be the prefixed names. This + allows us to use the real names throughout the code base (i.e., + "hwloc_"); the preprocessor will adjust to have the prefixed + name under the covers. */ + +/* Names from hwloc.h */ + +#define hwloc_get_api_version HWLOC_NAME(get_api_version) + +#define hwloc_topology HWLOC_NAME(topology) +#define hwloc_topology_t HWLOC_NAME(topology_t) + +#define hwloc_cpuset_t HWLOC_NAME(cpuset_t) +#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t) +#define hwloc_nodeset_t HWLOC_NAME(nodeset_t) +#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t) + +#define HWLOC_OBJ_SYSTEM HWLOC_NAME_CAPS(OBJ_SYSTEM) +#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) +#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) +#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) +#define HWLOC_OBJ_CACHE HWLOC_NAME_CAPS(OBJ_CACHE) +#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) +#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) +#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC) +#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP) +#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE) +#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE) +#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE) +#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX) +#define hwloc_obj_type_t HWLOC_NAME(obj_type_t) + +#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e) +#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t) +#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED) +#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA) +#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION) + +#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e) +#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t) +#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST) +#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI) + +#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e) +#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t) +#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK) +#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU) +#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK) +#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS) +#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA) +#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC) + +#define hwloc_compare_types HWLOC_NAME(compare_types) + +#define hwloc_compare_types_e HWLOC_NAME(compare_types_e) +#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED) + +#define hwloc_obj_memory_s HWLOC_NAME(obj_memory_s) +#define hwloc_obj_memory_page_type_s HWLOC_NAME(obj_memory_page_type_s) + +#define hwloc_obj HWLOC_NAME(obj) +#define hwloc_obj_t HWLOC_NAME(obj_t) + +#define hwloc_distances_s HWLOC_NAME(distances_s) +#define hwloc_obj_info_s HWLOC_NAME(obj_info_s) + +#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u) +#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s) +#define hwloc_group_attr_s HWLOC_NAME(group_attr_s) +#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s) +#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s) +#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s) + +#define hwloc_topology_init HWLOC_NAME(topology_init) +#define hwloc_topology_load HWLOC_NAME(topology_load) +#define hwloc_topology_destroy HWLOC_NAME(topology_destroy) +#define hwloc_topology_dup HWLOC_NAME(topology_dup) +#define hwloc_topology_check HWLOC_NAME(topology_check) +#define hwloc_topology_ignore_type HWLOC_NAME(topology_ignore_type) +#define hwloc_topology_ignore_type_keep_structure HWLOC_NAME(topology_ignore_type_keep_structure) +#define hwloc_topology_ignore_all_keep_structure HWLOC_NAME(topology_ignore_all_keep_structure) + +#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) + +#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) +#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) +#define HWLOC_TOPOLOGY_FLAG_IO_DEVICES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_DEVICES) +#define HWLOC_TOPOLOGY_FLAG_IO_BRIDGES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_BRIDGES) +#define HWLOC_TOPOLOGY_FLAG_WHOLE_IO HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_IO) +#define HWLOC_TOPOLOGY_FLAG_ICACHES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_ICACHES) +#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) + +#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) +#define hwloc_topology_set_fsroot HWLOC_NAME(topology_set_fsroot) +#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) +#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) +#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) +#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) +#define hwloc_topology_set_custom HWLOC_NAME(topology_set_custom) +#define hwloc_topology_set_distance_matrix HWLOC_NAME(topology_set_distance_matrix) + +#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support) +#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support) +#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support) +#define hwloc_topology_support HWLOC_NAME(topology_support) +#define hwloc_topology_get_support HWLOC_NAME(topology_get_support) +#define hwloc_topology_set_userdata HWLOC_NAME(topology_set_userdata) +#define hwloc_topology_get_userdata HWLOC_NAME(topology_get_userdata) + +#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml) +#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer) +#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer) +#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback) +#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata) +#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64) +#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback) + +#define hwloc_topology_export_synthetic_flags_e HWLOC_NAME(topology_export_synthetic_flags_e) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS) +#define hwloc_topology_export_synthetic HWLOC_NAME(topology_export_synthetic) + +#define hwloc_topology_insert_misc_object_by_cpuset HWLOC_NAME(topology_insert_misc_object_by_cpuset) +#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent) + +#define hwloc_custom_insert_topology HWLOC_NAME(custom_insert_topology) +#define hwloc_custom_insert_group_object_by_parent HWLOC_NAME(custom_insert_group_object_by_parent) + +#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) +#define HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_DISTANCES) +#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) +#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) +#define hwloc_topology_restrict HWLOC_NAME(topology_restrict) + +#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth) +#define hwloc_get_type_depth HWLOC_NAME(get_type_depth) + +#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e) +#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN) +#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE) +#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE) +#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE) +#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) + +#define hwloc_get_depth_type HWLOC_NAME(get_depth_type) +#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) +#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type) + +#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) +#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags) + +#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth ) +#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type ) + +#define hwloc_obj_type_string HWLOC_NAME(obj_type_string ) +#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf ) +#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf ) +#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf) +#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf) + +#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name) +#define hwloc_obj_add_info HWLOC_NAME(obj_add_info) + +#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS) +#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD) +#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT) +#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND) + +#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t) + +#define hwloc_set_cpubind HWLOC_NAME(set_cpubind) +#define hwloc_get_cpubind HWLOC_NAME(get_cpubind) +#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind) +#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind) +#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind) +#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind) + +#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location) +#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location) + +#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT) +#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH) +#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND) +#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE) +#define HWLOC_MEMBIND_REPLICATE HWLOC_NAME_CAPS(MEMBIND_REPLICATE) +#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH) +#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED) + +#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t) + +#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS) +#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD) +#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT) +#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE) +#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND) +#define HWLOC_MEMBIND_BYNODESET HWLOC_NAME_CAPS(MEMBIND_BYNODESET) + +#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t) + +#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset) +#define hwloc_set_membind HWLOC_NAME(set_membind) +#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset) +#define hwloc_get_membind HWLOC_NAME(get_membind) +#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset) +#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind) +#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset) +#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind) +#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset) +#define hwloc_set_area_membind HWLOC_NAME(set_area_membind) +#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset) +#define hwloc_get_area_membind HWLOC_NAME(get_area_membind) +#define hwloc_get_area_memlocation HWLOC_NAME(get_area_memlocation) +#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset) +#define hwloc_alloc_membind HWLOC_NAME(alloc_membind) +#define hwloc_alloc HWLOC_NAME(alloc) +#define hwloc_free HWLOC_NAME(free) + +#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj) +#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev) +#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid) +#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring) +#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev) +#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge) +#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus) +#define hwloc_get_hostbridge_by_pcibus HWLOC_NAME(get_hostbridge_by_pcibus) + +/* hwloc/bitmap.h */ + +#define hwloc_bitmap_s HWLOC_NAME(bitmap_s) +#define hwloc_bitmap_t HWLOC_NAME(bitmap_t) +#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t) + +#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc) +#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full) +#define hwloc_bitmap_free HWLOC_NAME(bitmap_free) +#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup) +#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy) +#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf) +#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf) +#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf) +#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf) +#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf) +#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf) +#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf) +#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf) +#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf) +#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) +#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) +#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) + +#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) +#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) +#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) +#define hwloc_bitmap_only HWLOC_NAME(bitmap_only) +#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) +#define hwloc_bitmap_set HWLOC_NAME(bitmap_set) +#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range) +#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong) +#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr) +#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range) +#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset) +#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero) +#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull) +#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal) +#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects) +#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded) +#define hwloc_bitmap_or HWLOC_NAME(bitmap_or) +#define hwloc_bitmap_and HWLOC_NAME(bitmap_and) +#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot) +#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor) +#define hwloc_bitmap_not HWLOC_NAME(bitmap_not) +#define hwloc_bitmap_first HWLOC_NAME(bitmap_first) +#define hwloc_bitmap_last HWLOC_NAME(bitmap_last) +#define hwloc_bitmap_next HWLOC_NAME(bitmap_next) +#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify) +#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first) +#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare) +#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight) + +/* hwloc/cpuset.h -- deprecated but still available */ + +#define hwloc_cpuset_alloc HWLOC_NAME(cpuset_alloc) +#define hwloc_cpuset_free HWLOC_NAME(cpuset_free) +#define hwloc_cpuset_dup HWLOC_NAME(cpuset_dup) +#define hwloc_cpuset_copy HWLOC_NAME(cpuset_copy) +#define hwloc_cpuset_snprintf HWLOC_NAME(cpuset_snprintf) +#define hwloc_cpuset_asprintf HWLOC_NAME(cpuset_asprintf) +#define hwloc_cpuset_from_string HWLOC_NAME(cpuset_from_string) +#define hwloc_cpuset_zero HWLOC_NAME(cpuset_zero) +#define hwloc_cpuset_fill HWLOC_NAME(cpuset_fill) +#define hwloc_cpuset_from_ulong HWLOC_NAME(cpuset_from_ulong) +#define hwloc_cpuset_taskset_snprintf HWLOC_NAME(cpuset_taskset_snprintf) +#define hwloc_cpuset_taskset_asprintf HWLOC_NAME(cpuset_taskset_asprintf) +#define hwloc_cpuset_taskset_sscanf HWLOC_NAME(cpuset_taskset_sscanf) + +#define hwloc_cpuset_from_ith_ulong HWLOC_NAME(cpuset_from_ith_ulong) +#define hwloc_cpuset_to_ulong HWLOC_NAME(cpuset_to_ulong) +#define hwloc_cpuset_to_ith_ulong HWLOC_NAME(cpuset_to_ith_ulong) +#define hwloc_cpuset_cpu HWLOC_NAME(cpuset_cpu) +#define hwloc_cpuset_all_but_cpu HWLOC_NAME(cpuset_all_but_cpu) +#define hwloc_cpuset_set HWLOC_NAME(cpuset_set) +#define hwloc_cpuset_set_range HWLOC_NAME(cpuset_set_range) +#define hwloc_cpuset_set_ith_ulong HWLOC_NAME(cpuset_set_ith_ulong) +#define hwloc_cpuset_clr HWLOC_NAME(cpuset_clr) +#define hwloc_cpuset_clr_range HWLOC_NAME(cpuset_clr_range) +#define hwloc_cpuset_isset HWLOC_NAME(cpuset_isset) +#define hwloc_cpuset_iszero HWLOC_NAME(cpuset_iszero) +#define hwloc_cpuset_isfull HWLOC_NAME(cpuset_isfull) +#define hwloc_cpuset_isequal HWLOC_NAME(cpuset_isequal) +#define hwloc_cpuset_intersects HWLOC_NAME(cpuset_intersects) +#define hwloc_cpuset_isincluded HWLOC_NAME(cpuset_isincluded) +#define hwloc_cpuset_or HWLOC_NAME(cpuset_or) +#define hwloc_cpuset_and HWLOC_NAME(cpuset_and) +#define hwloc_cpuset_andnot HWLOC_NAME(cpuset_andnot) +#define hwloc_cpuset_xor HWLOC_NAME(cpuset_xor) +#define hwloc_cpuset_not HWLOC_NAME(cpuset_not) +#define hwloc_cpuset_first HWLOC_NAME(cpuset_first) +#define hwloc_cpuset_last HWLOC_NAME(cpuset_last) +#define hwloc_cpuset_next HWLOC_NAME(cpuset_next) +#define hwloc_cpuset_singlify HWLOC_NAME(cpuset_singlify) +#define hwloc_cpuset_compare_first HWLOC_NAME(cpuset_compare_first) +#define hwloc_cpuset_compare HWLOC_NAME(cpuset_compare) +#define hwloc_cpuset_weight HWLOC_NAME(cpuset_weight) + +/* hwloc/helper.h */ + +#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth) +#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth) +#define hwloc_get_root_obj HWLOC_NAME(get_root_obj) +#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth) +#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type) +#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth) +#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type) +#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index) +#define hwloc_get_numanode_obj_by_os_index HWLOC_NAME(get_numanode_obj_by_os_index) +#define hwloc_get_next_child HWLOC_NAME(get_next_child) +#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj) +#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree) +#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset) +#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset) +#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth) +#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type) +#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth) +#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type) +#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth) +#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type) +#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset) +#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset) +#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset) +#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth) +#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type) +#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth) +#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset) +#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj) +#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs) +#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type) +#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) +#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) +#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) +#define hwloc_distrib HWLOC_NAME(distrib) +#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy) +#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset) +#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset) +#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset) +#define hwloc_topology_get_online_cpuset HWLOC_NAME(topology_get_online_cpuset) +#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset) +#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset) +#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset) +#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset) +#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset) +#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict) +#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset) +#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict) +#define hwloc_get_whole_distance_matrix_by_depth HWLOC_NAME(get_whole_distance_matrix_by_depth) +#define hwloc_get_whole_distance_matrix_by_type HWLOC_NAME(get_whole_distance_matrix_by_type) +#define hwloc_get_distance_matrix_covering_obj_by_depth HWLOC_NAME(get_distance_matrix_covering_obj_by_depth) +#define hwloc_get_latency HWLOC_NAME(get_latency) + +/* diff.h */ + +#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e) +#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO) +#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u) +#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s) +#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s) +#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s) +#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e) +#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR) +#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX) +#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u) +#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t) +#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s) +#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s) +#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s) +#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build) +#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e) +#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE) +#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply) +#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy) +#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml) +#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml) +#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer) +#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer) + +/* glibc-sched.h */ + +#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity) +#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity) + +/* linux-libnuma.h */ + +#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs) +#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs) +#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs) +#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs) +#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask) +#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask) +#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask) +#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask) + +/* linux.h */ + +#define hwloc_linux_parse_cpumap_file HWLOC_NAME(linux_parse_cpumap_file) +#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind) +#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind) +#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location) + +/* openfabrics-verbs.h */ + +#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset) +#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev) +#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name) + +/* myriexpress.h */ + +#define hwloc_mx_board_get_device_cpuset HWLOC_NAME(mx_board_get_device_cpuset) +#define hwloc_mx_endpoint_get_device_cpuset HWLOC_NAME(mx_endpoint_get_device_cpuset) + +/* intel-mic.h */ + +#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset) +#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index) + +/* opencl.h */ + +#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) +#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) +#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) + +/* cuda.h */ + +#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids) +#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset) +#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev) +#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev) +#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index) + +/* cudart.h */ + +#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids) +#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset) +#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev) +#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index) + +/* nvml.h */ + +#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset) +#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev) +#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index) + +/* gl.h */ + +#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) +#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name) +#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev) + +/* hwloc/plugins.h */ + +#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) +#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) +#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) +#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) +#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) +#define hwloc_disc_component HWLOC_NAME(disc_component) + +#define hwloc_backend HWLOC_NAME(backend) +#define hwloc_backend_flag_e HWLOC_NAME(backend_flag_e) +#define HWLOC_BACKEND_FLAG_NEED_LEVELS HWLOC_NAME_CAPS(BACKEND_FLAG_NEED_LEVELS) + +#define hwloc_backend_alloc HWLOC_NAME(backend_alloc) +#define hwloc_backend_enable HWLOC_NAME(backend_enable) +#define hwloc_backends_get_obj_cpuset HWLOC_NAME(backends_get_obj_cpuset) +#define hwloc_backends_notify_new_object HWLOC_NAME(backends_notify_new_object) + +#define hwloc_component_type_e HWLOC_NAME(component_type_e) +#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC) +#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML) +#define hwloc_component_type_t HWLOC_NAME(component_type_t) +#define hwloc_component HWLOC_NAME(component) + +#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace) + +#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset) +#define hwloc_report_error_t HWLOC_NAME(report_error_t) +#define hwloc_report_os_error HWLOC_NAME(report_os_error) +#define hwloc_hide_errors HWLOC_NAME(hide_errors) +#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset) +#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent) +#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object) +#define hwloc_fill_object_sets HWLOC_NAME(fill_object_sets) + +#define hwloc_insert_pci_device_list HWLOC_NAME(insert_pci_device_list) +#define hwloc_pci_find_cap HWLOC_NAME(pci_find_cap) +#define hwloc_pci_find_linkspeed HWLOC_NAME(pci_find_linkspeed) +#define hwloc_pci_prepare_bridge HWLOC_NAME(pci_prepare_bridge) + +/* hwloc/deprecated.h */ + +#define hwloc_obj_type_of_string HWLOC_NAME(obj_type_of_string ) +#define hwloc_obj_snprintf HWLOC_NAME(obj_snprintf) +#define hwloc_distributev HWLOC_NAME(distributev) +#define hwloc_distribute HWLOC_NAME(distribute) + +/* private/debug.h */ + +#define hwloc_debug_enabled HWLOC_NAME(debug_enabled) +#define hwloc_debug HWLOC_NAME(debug) + +/* private/misc.h */ + +#define hwloc_snprintf HWLOC_NAME(snprintf) +#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) +#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) +#define hwloc_ffs32 HWLOC_NAME(ffs32) +#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) +#define hwloc_flsl_manual HWLOC_NAME(flsl_manual) +#define hwloc_fls32 HWLOC_NAME(fls32) +#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32) +#define hwloc_weight_long HWLOC_NAME(weight_long) +#define hwloc_strncasecmp HWLOC_NAME(strncasecmp) + +/* private/cpuid-x86.h */ + +#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid) +#define hwloc_x86_cpuid HWLOC_NAME(x86_cpuid) + +/* private/xml.h */ + +#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose) + +#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s) +#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t) +#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff) +#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s) +#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s) +#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t) +#define hwloc__xml_export_object HWLOC_NAME(_xml_export_object) +#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff) + +#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks) +#define hwloc_xml_component HWLOC_NAME(xml_component) +#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register) +#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset) + +/* private/components.h */ + +#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable) +#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others) + +#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) +#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) + +#define hwloc_components_init HWLOC_NAME(components_init) +#define hwloc_components_destroy_all HWLOC_NAME(components_destroy_all) + +/* private/private.h */ + +#define hwloc_ignore_type_e HWLOC_NAME(ignore_type_e) + +#define HWLOC_IGNORE_TYPE_NEVER HWLOC_NAME_CAPS(IGNORE_TYPE_NEVER) +#define HWLOC_IGNORE_TYPE_KEEP_STRUCTURE HWLOC_NAME_CAPS(IGNORE_TYPE_KEEP_STRUCTURE) +#define HWLOC_IGNORE_TYPE_ALWAYS HWLOC_NAME_CAPS(IGNORE_TYPE_ALWAYS) + +#define hwloc_os_distances_s HWLOC_NAME(os_distances_s) + +#define hwloc_xml_imported_distances_s HWLOC_NAME(xml_imported_distances_s) + +#define hwloc_alloc_obj_cpusets HWLOC_NAME(alloc_obj_cpusets) +#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) +#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname) +#define hwloc_get_sysctl HWLOC_NAME(get_sysctl) +#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors) +#define hwloc_connect_children HWLOC_NAME(connect_children) +#define hwloc_connect_levels HWLOC_NAME(connect_levels) + +#define hwloc__object_cpusets_compare_first HWLOC_NAME(_object_cpusets_compare_first) + +#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults) +#define hwloc_topology_clear HWLOC_NAME(topology_clear) + +#define hwloc__add_info HWLOC_NAME(_add_info) +#define hwloc__find_info_slot HWLOC_NAME(_find_info_slot) +#define hwloc__move_infos HWLOC_NAME(_move_infos) +#define hwloc__free_infos HWLOC_NAME(_free_infos) + +#define hwloc_binding_hooks HWLOC_NAME(binding_hooks) +#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks) +#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks) + +#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks) +#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks) +#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks) +#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks) +#define hwloc_set_osf_hooks HWLOC_NAME(set_osf_hooks) +#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks) +#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks) +#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks) +#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks) +#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks) + +#define hwloc_look_hardwired_fujitsu_k HWLOC_NAME(look_hardwired_fujitsu_k) +#define hwloc_look_hardwired_fujitsu_fx10 HWLOC_NAME(look_hardwired_fujitsu_fx10) +#define hwloc_look_hardwired_fujitsu_fx100 HWLOC_NAME(look_hardwired_fujitsu_fx100) + +#define hwloc_add_uname_info HWLOC_NAME(add_uname_info) +#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object) +#define hwloc__duplicate_objects HWLOC_NAME(_duplicate_objects) + +#define hwloc_alloc_heap HWLOC_NAME(alloc_heap) +#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap) +#define hwloc_free_heap HWLOC_NAME(free_heap) +#define hwloc_free_mmap HWLOC_NAME(free_mmap) +#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail) + +#define hwloc_distances_init HWLOC_NAME(distances_init) +#define hwloc_distances_destroy HWLOC_NAME(distances_destroy) +#define hwloc_distances_set HWLOC_NAME(distances_set) +#define hwloc_distances_set_from_env HWLOC_NAME(distances_set_from_env) +#define hwloc_distances_restrict_os HWLOC_NAME(distances_restrict_os) +#define hwloc_distances_restrict HWLOC_NAME(distances_restrict) +#define hwloc_distances_finalize_os HWLOC_NAME(distances_finalize_os) +#define hwloc_distances_finalize_logical HWLOC_NAME(distances_finalize_logical) +#define hwloc_clear_object_distances HWLOC_NAME(clear_object_distances) +#define hwloc_clear_object_distances_one HWLOC_NAME(clear_object_distances_one) +#define hwloc_group_by_distances HWLOC_NAME(group_by_distances) + +#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64) +#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64) + +#define hwloc_obj_add_info_nodup HWLOC_NAME(obj_add_info_nodup) + +#define hwloc_progname HWLOC_NAME(progname) + +#define hwloc_bitmap_compare_inclusion HWLOC_NAME(bitmap_compare_inclusion) + +/* private/solaris-chiptype.h */ + +#define hwloc_solaris_chip_info_s HWLOC_NAME(solaris_chip_info_s) +#define hwloc_solaris_get_chip_info HWLOC_NAME(solaris_get_chip_info) + +#endif /* HWLOC_SYM_TRANSFORM */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_RENAME_H */ diff --git a/rocrtst/thirdparty/lib/LICENSE b/rocrtst/thirdparty/lib/LICENSE new file mode 100644 index 0000000000..c960c7626f --- /dev/null +++ b/rocrtst/thirdparty/lib/LICENSE @@ -0,0 +1,38 @@ +Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. +Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. +Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. +Copyright © 2004-2005 The Regents of the University of California. All rights reserved. +Copyright © 2009 CNRS +Copyright © 2009-2016 Inria. All rights reserved. +Copyright © 2009-2015 Université Bordeaux +Copyright © 2009-2015 Cisco Systems, Inc. All rights reserved. +Copyright © 2009-2012 Oracle and/or its affiliates. All rights reserved. +Copyright © 2010 IBM +Copyright © 2010 Jirka Hladky +Copyright © 2012 Aleksej Saushev, The NetBSD Foundation +Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. +Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved. +Copyright © 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. +Copyright © 2015-2016 Intel, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/rocrtst/thirdparty/libhwloc.so.5 b/rocrtst/thirdparty/lib/libhwloc.so.5 similarity index 100% rename from rocrtst/thirdparty/libhwloc.so.5 rename to rocrtst/thirdparty/lib/libhwloc.so.5