diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 8c907fa885..2a7433ea75 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -1455,7 +1455,7 @@ class AMDSMIParser(argparse.ArgumentParser): reset_perf_det_help = "Disable performance determinism" reset_power_cap_help = "Reset the PPT0 and PPT1 power capacity limit to max capable" reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs on a per partition basis" - reset_gpu_driver_help = "Reset (reload) AMD GPU driver" + reset_gpu_driver_help = "Triggers a chain that resets all GPU's" # Create reset subparser reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help) diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index 4a491d4920..aa43d2e5dd 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -187,6 +187,21 @@ typedef enum { */ #define MAX_NUMBER_OF_AFIDS_PER_RECORD 12 //!< Maximum AFIDs per CPER record +/** + * @brief Maximum size definitions AMDSMI + * + * @cond @tag{gpu_bm_linux} @tag{host} @endcond + */ +#define AMDSMI_MAX_VF_COUNT 32 //!< Maximum number of virtual functions supported +#define AMDSMI_MAX_DRIVER_NUM 2 //!< Maximum number of drivers supported +#define AMDSMI_DFC_FW_NUMBER_OF_ENTRIES 9 //!< Number of DFC firmware entries supported +#define AMDSMI_MAX_WHITE_LIST_ELEMENTS 16 //!< Maximum number of white list elements for device access control +#define AMDSMI_MAX_BLACK_LIST_ELEMENTS 64 //!< Maximum number of black list elements for device access control +#define AMDSMI_MAX_UUID_ELEMENTS 16 //!< Maximum number of UUID elements supported +#define AMDSMI_MAX_TA_WHITE_LIST_ELEMENTS 8 //!< Maximum number of TA (Trusted Application) white list elements +#define AMDSMI_MAX_ERR_RECORDS 10 //!< Maximum number of error records that can be stored +#define AMDSMI_MAX_PROFILE_COUNT 16 //!< Maximum number of profiles supported + /** * @brief String format * @@ -215,6 +230,14 @@ typedef enum { #define AMDSMI_LIB_VERSION_EXPAND_PARTS(MAJOR_STR, MINOR_STR, RELEASE_STR) AMDSMI_LIB_VERSION_CREATE_STRING(MAJOR_STR, MINOR_STR, RELEASE_STR) #define AMDSMI_LIB_VERSION_STRING AMDSMI_LIB_VERSION_EXPAND_PARTS(AMDSMI_LIB_VERSION_MAJOR, AMDSMI_LIB_VERSION_MINOR, AMDSMI_LIB_VERSION_RELEASE) +/** + * @brief Maximum size definitions AMDSMI + * + * @cond @tag{gpu_bm_linux} @endcond + */ +#define AMDSMI_PF_INDEX (AMDSMI_MAX_VF_COUNT - 1) +#define AMDSMI_MAX_DRIVER_INFO_RSVD 64 + /** * @brief GPU Capability info * @@ -276,20 +299,17 @@ typedef struct { /** * @brief Processor types detectable by AMD SMI * - * AMDSMI_PROCESSOR_TYPE_AMD_CPU - CPU Socket is a physical component that holds the CPU. - * AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE - CPU Cores are number of individual processing units within the CPU. - * AMDSMI_PROCESSOR_TYPE_AMD_APU - Combination of AMDSMI_PROCESSOR_TYPE_AMD_CPU and integrated GPU on single die - * * @cond @tag{gpu_bm_linux} @tag{host} @tag{cpu_bm} @tag{guest_windows} @endcond */ typedef enum { - AMDSMI_PROCESSOR_TYPE_UNKNOWN = 0, //!< Unknown processor type - AMDSMI_PROCESSOR_TYPE_AMD_GPU, //!< AMD Graphics processor type - AMDSMI_PROCESSOR_TYPE_AMD_CPU, //!< AMD CPU processor type - AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU, //!< Non-AMD Graphics processor type - AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU, //!< Non-AMD CPU processor type - AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE, //!< AMD CPU-Core processor type - AMDSMI_PROCESSOR_TYPE_AMD_APU //!< AMD Accelerated processor type (GPU and CPU) + AMDSMI_PROCESSOR_TYPE_UNKNOWN = 0, //!< Unknown processor type + AMDSMI_PROCESSOR_TYPE_AMD_GPU, //!< AMD Graphics processor type + AMDSMI_PROCESSOR_TYPE_AMD_CPU, //!< AMD CPU processor type, a physical component that holds the CPU + AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU, //!< Non-AMD Graphics processor type + AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU, //!< Non-AMD CPU processor type + AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE, //!< AMD CPU-Core processor type, individual processing units within the CPU + AMDSMI_PROCESSOR_TYPE_AMD_APU, //!< AMD Accelerated processor type, GPU and CPU on a single die + AMDSMI_PROCESSOR_TYPE_AMD_NIC //!< AMD Network Interface Card processor type } processor_type_t; /** @@ -679,7 +699,7 @@ typedef struct { /** * @brief VRam Usage * - * @cond @tag{gpu_bm_linux} @endcond + * @cond @tag{gpu_bm_linux} @tag{guest_windows} @endcond */ typedef struct { uint32_t vram_total; //!< In MB @@ -754,7 +774,7 @@ typedef struct { /** * @brief bdf types * - * @cond @tag{gpu_bm_linux} @tag{host} @endcond + * @cond @tag{gpu_bm_linux} @tag{host} @tag{guest_windows} @endcond */ typedef union { struct bdf_ { @@ -829,7 +849,7 @@ typedef struct { /** * @brief Power Cap Information * - * @cond @tag{gpu_bm_linux} @tag{host} @endcond + * @cond @tag{gpu_bm_linux} @tag{host} @tag{guest_windows} @endcond */ typedef struct { uint64_t power_cap; //!< current power cap Units uW {@linux_bm} or W {@host} @@ -860,7 +880,7 @@ typedef struct { char build_date[AMDSMI_MAX_STRING_LENGTH]; char part_number[AMDSMI_MAX_STRING_LENGTH]; char version[AMDSMI_MAX_STRING_LENGTH]; - char boot_firmware[AMDSMI_MAX_STRING_LENGTH]; // UBL (Unified BootLoader) Version information + char boot_firmware[AMDSMI_MAX_STRING_LENGTH]; //!< UBL (Unified BootLoader) Version information uint64_t reserved[36]; } amdsmi_vbios_info_t; @@ -917,17 +937,17 @@ typedef struct { */ typedef struct { char market_name[AMDSMI_MAX_STRING_LENGTH]; - uint32_t vendor_id; //!< Use 32 bit to be compatible with other platform. + uint32_t vendor_id; //!< Use 32 bit to be compatible with other platform. char vendor_name[AMDSMI_MAX_STRING_LENGTH]; - uint32_t subvendor_id; //!< The subsystem vendor ID - uint64_t device_id; //!< The device ID of a GPU - uint32_t rev_id; //!< The revision ID of a GPU + uint32_t subvendor_id; //!< The subsystem vendor ID + uint64_t device_id; //!< The device ID of a GPU + uint32_t rev_id; //!< The revision ID of a GPU char asic_serial[AMDSMI_MAX_STRING_LENGTH]; - uint32_t oam_id; //!< 0xFFFFFFFF if not supported + uint32_t oam_id; //!< 0xFFFFFFFF if not supported uint32_t num_of_compute_units; //!< 0xFFFFFFFF if not supported uint64_t target_graphics_version; //!< 0xFFFFFFFFFFFFFFFF if not supported - uint32_t subsystem_id; //!> The subsystem ID - uint64_t flags; //!< Chip flags + uint32_t subsystem_id; //!> The subsystem ID + uint64_t flags; //!< Chip flags uint32_t reserved[19]; } amdsmi_asic_info_t; @@ -1046,20 +1066,33 @@ typedef struct { uint32_t cpu_util_irq; } amdsmi_cpu_util_t; +/** + * @brief Link Status + * + * @cond @tag{gpu_bm_linux} @tag{host} @endcond + */ +typedef enum { + AMDSMI_LINK_STATUS_ENABLED = 0, + AMDSMI_LINK_STATUS_DISABLED = 1, + AMDSMI_LINK_STATUS_INACTIVE = 2, + AMDSMI_LINK_STATUS_ERROR = 3 +} amdsmi_link_status_t; + /** * @brief Link Metrics * * @cond @tag{gpu_bm_linux} @tag{host} @endcond */ typedef struct { - uint32_t num_links; //!< number of links + uint32_t num_links; //!< number of links struct _links { - amdsmi_bdf_t bdf; //!< bdf of the destination gpu - uint32_t bit_rate; //!< current link speed in Gb/s - uint32_t max_bandwidth; //!< max bandwidth of the link in Gb/s - amdsmi_link_type_t link_type; //!< type of the link - uint64_t read; //!< total data received for each link in KB - uint64_t write; //!< total data transfered for each link in KB + amdsmi_bdf_t bdf; //!< bdf of the destination gpu + uint32_t bit_rate; //!< current link speed in Gb/s + uint32_t max_bandwidth; //!< max bandwidth of the link in Gb/s + amdsmi_link_type_t link_type; //!< type of the link + uint64_t read; //!< total data received for each link in KB + uint64_t write; //!< total data transfered for each link in KB + amdsmi_link_status_t link_status; //!< HW status of the link uint64_t reserved[1]; } links[AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK]; uint64_t reserved[7]; @@ -1107,18 +1140,18 @@ typedef struct { /** * @brief Power Information * - * Unsupported struct members are set to UINT32_MAX + * @note Unsupported struct members are set to UINT32_MAX * - * @cond @tag{gpu_bm_linux} @endcond + * @cond @tag{gpu_bm_linux} @tag{host} @tag{guest_windows} @endcond */ typedef struct { - uint64_t socket_power; //!< Socket power in W - uint32_t current_socket_power; //!< Current socket power in W, Mi 300+ Series cards - uint32_t average_socket_power; //!< Average socket power in W, Navi + Mi 200 and earlier Series cards - uint64_t gfx_voltage; //!< GFX voltage measurement in mV - uint64_t soc_voltage; //!< SOC voltage measurement in mV - uint64_t mem_voltage; //!< MEM voltage measurement in mV - uint32_t power_limit; //!< The power limit in W + uint64_t socket_power; //!< Current power usage in W {@linux_bm}, uW {@host} + uint32_t current_socket_power; //!< Current socket power in W {@linux_bm}, Linux only, Mi 300+ Series cards + uint32_t average_socket_power; //!< Average socket power in W {@linux_bm}, Linux only, Navi + Mi 200 and earlier Series cards + uint64_t gfx_voltage; //!< GFX voltage measurement in mV {@linux_bm} or V {@host} + uint64_t soc_voltage; //!< SOC voltage measurement in mV {@linux_bm} or V {@host} + uint64_t mem_voltage; //!< MEM voltage measurement in mV {@linux_bm} or V {@host} + uint32_t power_limit; //!< The power limit in W {@linux_bm}, Linux only uint64_t reserved[18]; } amdsmi_power_info_t; @@ -1541,6 +1574,31 @@ typedef enum { AMDSMI_CPER_NOTIFY_TYPE_CXL_COMPONENT = 0x49A341DF69293BC9 //!< Compute Express Link Component Error } amdsmi_cper_notify_type_t; +/** + * @brief Ras policy v4.0 + * + * @cond @tag{gpu_bm_linux} @tag{host} @endcond + */ +typedef struct { + uint16_t dram_non_critical_region_threshold; //!< Non-critical region UCE threshold + uint16_t dram_critical_region_threshold; //!< Critical region UCE threshold +} amdsmi_gpu_ras_policy_v4_0_t; + +/** + * @brief Ras policy info structure for storing version and different ras + * policy version structures + * + * @cond @tag{gpu_bm_linux} @tag{host} @endcond + */ +typedef struct { + uint8_t major_version; + uint8_t minor_version; + union policy_data_ { + amdsmi_gpu_ras_policy_v4_0_t v4_0; + uint64_t info[5]; //!< total size of the EEPROM that can be used by the policy is 40bytes + } policy_data; +} amdsmi_gpu_ras_policy_info_t; + /** * @brief The current ECC state * @@ -2074,7 +2132,7 @@ typedef enum { } amdsmi_reg_type_t; /** - * @brief This structure holds ras feature + * @brief This structure holds ras feature information. * * @cond @tag{gpu_bm_linux} @platform{guest_windows} @tag{host} @endcond */ @@ -2084,6 +2142,13 @@ typedef struct { uint32_t ecc_correction_schema_flag; /**< ecc_correction_schema mask. PARITY error(bit 0), Single Bit correctable (bit1), Double bit error detection (bit2), Poison (bit 3) */ + struct ras_info_ { + uint32_t dram_ecc : 1; + uint32_t sram_ecc : 1; + uint32_t poisoning : 1; + uint32_t rsvd : 29; + } ras_info; + bool needs_reboot; } amdsmi_ras_feature_t; /** @@ -2128,7 +2193,7 @@ typedef struct { * Place-holder "variant" for functions that have don't have any variants, * but do have monitors or sensors. * - * @cond @tag{gpu_bm_linux} @tag{host} @endcond + * @cond @tag{gpu_bm_linux} @tag{host} @tag{guest_windows} @endcond */ typedef enum { AMDSMI_VIRTUALIZATION_MODE_UNKNOWN = 0, //!< Unknown Virtualization Mode @@ -2176,6 +2241,7 @@ typedef struct { * Only F8 and XF32 are always supported at full performance. From the remaining * five types, only two can be supported at peak performance simultaneously. * + * @cond @tag{gpu_bm_linux} @tag{host} @endcond */ typedef enum { AMDSMI_PTL_DATA_FORMAT_I8 = 0x0, //!< Integer 8-bit format @@ -2497,6 +2563,23 @@ amdsmi_status_t amdsmi_shut_down(void); */ amdsmi_status_t amdsmi_get_socket_handles(uint32_t *socket_count, amdsmi_socket_handle* socket_handles); +/** + * @brief Returns the index of the given processor handle + * + * @ingroup tagProcDiscovery + * + * @platform{gpu_bm_linux} @platform{host} @platform{cpu_bm} @platform{guest_1vf} + * @platform{guest_mvf} @platform{guest_windows} + * + * @param[in] processor_handle Processor handle for which to query + * + * @param[out] processor_index Pointer to integer to store the processor index. Must be + * allocated by user. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_index_from_processor_handle(amdsmi_processor_handle processor_handle, uint32_t *processor_index); + #ifdef ENABLE_ESMI_LIB /** @@ -2573,6 +2656,37 @@ amdsmi_status_t amdsmi_get_socket_info(amdsmi_socket_handle socket_handle, size_ */ amdsmi_status_t amdsmi_get_processor_info(amdsmi_processor_handle processor_handle, size_t len, char *name); +/** + * @brief Get the list of cpu socket handles in the system. + * + * @ingroup tagProcDiscovery + * + * @platform{cpu_bm} + * + * @details Depends on AMDSMI_INIT_AMD_CPUS flag passed to ::amdsmi_init. + * The socket handles can be used to query the processor handles in that socket, which + * will be used in other APIs to get processor detail information. + * + * @param[in,out] socket_count As input, the value passed + * through this parameter is the number of ::amdsmi_cpusocket_handle that + * may be safely written to the memory pointed to by @p socket_handles. This is the + * limit on how many socket handles will be written to @p socket_handles. On return, @p + * socket_count will contain the number of socket handles written to @p socket_handles, + * or the number of socket handles that could have been written if enough memory had been + * provided. + * If @p socket_handles is NULL, as output, @p socket_count will contain + * how many sockets are available to read in the system. + * + * @param[in,out] socket_handles A pointer to a block of memory to which the + * ::amdsmi_cpusocket_handle values will be written. This value may be NULL. + * In this case, this function can be used to query how many sockets are + * available to read in the system. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_cpusocket_handles(uint32_t *socket_count, + amdsmi_cpusocket_handle* socket_handles); + /** * @brief Get respective processor counts from the processor handles * @@ -2601,29 +2715,36 @@ amdsmi_status_t amdsmi_get_processor_count_from_handles(amdsmi_processor_handle* uint32_t* nr_cpucores, uint32_t* nr_gpus); /** - * @brief Get processor list as per processor type + * @brief Returns a list of processor handles of the specified type in the system. * * @ingroup tagProcDiscovery * - * @platform{cpu_bm} + * @platform{host} @platform{gpu_bm_linux} @platform{cpu_bm} * * @details This function retrieves processor list as per the processor type * from the total processor handles list. * The @p list of processor_handles and processor type must be provided. * - * @param[in] socket_handle socket handle + * @note This function fills the user-provided buffer with processor handles of the given type + * (e.g., GPU, NIC). The processor handles returned are used to instantiate the rest of processor + * queries in the library. If the buffer is not large enough, the call will fail. * - * @param[in] processor_type processor type + * @param[in] socket_handle The socket to query. * - * @param[out] processor_handles list of processor handles as per processor type + * @param[in] processor_type The type of processor to query (see ::processor_type_t). * - * @param[out] processor_count processor count as per processor type selected + * @param[out] processor_handles Reference to list of processor handles returned by + * the library. Buffer must be allocated by user. + * + * @param[in,out] processor_count As input, the size of the provided buffer. + * As output, number of processor handles in the buffer. + * Parameter must be allocated by user. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ amdsmi_status_t amdsmi_get_processor_handles_by_type(amdsmi_socket_handle socket_handle, processor_type_t processor_type, - amdsmi_processor_handle* processor_handles, + amdsmi_processor_handle *processor_handles, uint32_t* processor_count); #endif @@ -2643,6 +2764,9 @@ amdsmi_status_t amdsmi_get_processor_handles_by_type(amdsmi_socket_handle socket * * @note Sockets are not supported on the @platform{host}. * + * @note On the @platform{host} this function currently supports only AMD GPUs. To enumerate other devices, + * such as AMD NICs, use amdsmi_get_processor_handles_by_type(). + * * The number of processor count is returned through @p processor_count * if @p processor_handles is NULL. Then the number of @p processor_count can be pass * as input to retrieval all processors on the socket to @p processor_handles. @@ -2685,7 +2809,7 @@ amdsmi_status_t amdsmi_get_processor_handles(amdsmi_socket_handle socket_handle, * is required to be OAM ID 0 otherwise the API will fail. OAM ID is sourced * from amdsmi_get_gpu_asic_info API. * - * @param[out] amdsmi_node_handle* A pointer to a block of memory where amdsmi_node_handle + * @param[out] node_handle A pointer to a block of memory where amdsmi_node_handle * will be written. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail @@ -2737,7 +2861,7 @@ amdsmi_status_t amdsmi_get_cpucore_handles(uint32_t *cores_count, * * @param[in] processor_handle a processor handle * - * @param[out] processor_type a pointer to processor_type_t to which the processor type + * @param[out] processor_type a pointer to ::processor_type_t to which the processor type * will be written. If this parameter is nullptr, this function will return * ::AMDSMI_STATUS_INVAL. * @@ -2745,6 +2869,25 @@ amdsmi_status_t amdsmi_get_cpucore_handles(uint32_t *cores_count, */ amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle, processor_type_t* processor_type); +/** + * @brief Returns the processor handle from the given processor index + * + * @ingroup tagProcDiscovery + * + * @platform{gpu_bm_linux} @platform{host} @platform{cpu_bm} @platform{guest_1vf} + * @platform{guest_mvf} @platform{guest_windows} + * + * @param[in] processor_index Function processor_index to query + * + * @note On the @platform{host} this function currently supports only AMD GPU indexes. + * + * @param[out] processor_handle Reference to the processor handle. + * Must be allocated by user. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_processor_handle_from_index(uint32_t processor_index, amdsmi_processor_handle *processor_handle); + /** * @brief Get processor handle with the matching bdf. * @@ -2765,7 +2908,7 @@ amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_hand amdsmi_status_t amdsmi_get_processor_handle_from_bdf(amdsmi_bdf_t bdf, amdsmi_processor_handle* processor_handle); /** - * @brief Returns BDF of the given device + * @brief Returns BDF of the given GPU device * * @ingroup tagProcDiscovery * @@ -2781,6 +2924,38 @@ amdsmi_status_t amdsmi_get_processor_handle_from_bdf(amdsmi_bdf_t bdf, amdsmi_pr amdsmi_status_t amdsmi_get_gpu_device_bdf(amdsmi_processor_handle processor_handle, amdsmi_bdf_t *bdf); +/** + * @brief Returns BDF of the given device + * + * @ingroup tagProcDiscovery + * + * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf} + * @platform{guest_windows} + * + * @param[in] processor_handle Device which to query + * + * @param[out] bdf Reference to BDF. Must be allocated by user. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_processor_bdf(amdsmi_processor_handle processor_handle, amdsmi_bdf_t *bdf); + +/** + * @brief Returns the processor handle from the given UUID + * + * @ingroup tagProcDiscovery + * + * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} + * + * @param[in] uuid Function UUID to query. + * + * @param[out] processor_handle Reference to the processor handle. + * Must be allocated by user. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_processor_handle_from_uuid(const char *uuid, amdsmi_processor_handle *processor_handle); + /** * @brief Returns the UUID of the device * @@ -2857,7 +3032,7 @@ amdsmi_status_t amdsmi_get_cpu_affinity_with_scope(amdsmi_processor_handle proce * * @ingroup tagProcDiscovery * - * @platform{gpu_bm_linux} @platform{guest_1vf} @platform{host} + * @platform{gpu_bm_linux} @platform{host} * * @details The virtualization mode is detected and returned as an enum. * @@ -3240,7 +3415,7 @@ amdsmi_status_t amdsmi_get_gpu_pci_replay_counter(amdsmi_processor_handle proces * ::amdsmi_frequencies_t::num_supported field of ::amdsmi_pcie_bandwidth_t will be * ignored. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -3366,10 +3541,10 @@ amdsmi_set_gpu_power_profile(amdsmi_processor_handle processor_handle, uint32_t * * @param[in] processor_handle A processor handle. * @param[out] sensor_count Pointer to a uint32_t that will be set to the number of supported sensors. - * @param[out] sensor_inds Pointer to an array of uint32_t to be filled with sensor indices. - * The array must be allocated by the caller with enough space. + * @param[out] sensor_inds Pointer to an array of uint32_t to be filled with sensor indices. + * The array must be allocated by the caller with enough space. * @param[out] sensor_types Pointer to an array of amdsmi_power_cap_type_t to be filled with sensor types. - * The array must be allocated by the caller with enough space. + * The array must be allocated by the caller with enough space. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail. */ @@ -3580,7 +3755,7 @@ amdsmi_get_gpu_bad_page_info(amdsmi_processor_handle processor_handle, uint32_t * @param[in] processor_handle a processor handle * @param[out] threshold of bad page count. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ @@ -3601,7 +3776,7 @@ amdsmi_get_gpu_bad_page_threshold(amdsmi_processor_handle processor_handle, uint * * @param[in] processor_handle a processor handle * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success * AMDSMI_STATUS_CORRUPTED_EEPROM on the device's EEPROM corruption @@ -3867,7 +4042,7 @@ amdsmi_status_t amdsmi_reset_gpu_fan(amdsmi_processor_handle processor_handle, u * An error will be returned if the specified speed is outside the allowable * range for the device. The maximum value is 255 and the minimum is 0. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -4072,13 +4247,17 @@ amdsmi_status_t amdsmi_get_clk_freq(amdsmi_processor_handle processor_handle, amdsmi_clk_type_t clk_type, amdsmi_frequencies_t *f); /** - * @brief Reset the gpu associated with the device with provided processor handle. It is not - * supported on virtual machine guest + * @brief Triggers a chain that resets all GPUs. + * It is not supported on virtual machine guest * * @ingroup tagClkPowerPerfQuery * * @platform{gpu_bm_linux} @platform{host} * + * @note After this function returns, the caller must wait a few seconds before calling + * any other AMD SMI API functions to allow the GPU reset to complete. Calling other APIs + * too soon may result in AMDSMI_STATUS_BUSY or undefined behavior. + * * @details Given a processor handle @p processor_handle, this function will reset the GPU * * @param[in] processor_handle a processor handle @@ -4481,7 +4660,7 @@ amdsmi_get_gpu_power_profile_presets(amdsmi_processor_handle processor_handle, u * perf_level, this function will set the PowerPlay performance level for the * device to the value @p perf_lvl. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -4557,7 +4736,7 @@ amdsmi_status_t amdsmi_set_gpu_overdrive_level(amdsmi_processor_handle processor * All bits with indices greater than or equal to * ::amdsmi_frequencies_t::num_supported will be ignored. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -4606,7 +4785,7 @@ amdsmi_status_t amdsmi_get_soc_pstate(amdsmi_processor_handle processor_handle, * this function will set the soc pstate policy for this processor. All the processors at * the same socket will be set to the same policy. * - * @note This function requires root access + * @note This function requires admin/sudo privileges on @platform{gpu_bm_linux} * * @param[in] processor_handle a processor handle * @@ -4646,13 +4825,13 @@ amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle, * * @ingroup tagClkPowerPerfControl * - * @platform{gpu_bm_linux} @platform{guest_1vf} + * @platform{gpu_bm_linux} @platform{guest_1vf} @platform{host} * * @details Given a processor handle @p processor_handle and a dpm policy @p policy_id, * this function will set the xgmi plpd for this processor. All the processors at * the same socket will be set to the same policy. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -4697,7 +4876,7 @@ amdsmi_status_t amdsmi_get_gpu_process_isolation(amdsmi_processor_handle process * flag, this function will set the Process Isolation for this processor. The 0 is the process * isolation disabled, and the 1 is the process isolation enabled. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -4719,7 +4898,7 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process * this function will clean the local data of this processor. This can be called between * user logins to prevent information leak. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -4783,10 +4962,9 @@ amdsmi_status_t amdsmi_get_lib_version(amdsmi_version_t *version); * * @param[in,out] ec A pointer to an ::amdsmi_error_count_t to which the error * counts should be written - * If this parameter is nullptr, this function will return - * ::AMDSMI_STATUS_INVAL if the function is supported with the provided, - * arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the - * provided arguments. + * If this parameter is nullptr, this function will return ::AMDSMI_STATUS_INVAL + * if the function is supported with the provided arguments and ::AMDSMI_STATUS_NOT_SUPPORTED + * if it is not supported with the provided arguments. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ @@ -4818,10 +4996,9 @@ amdsmi_status_t amdsmi_get_gpu_ecc_count(amdsmi_processor_handle processor_handl * * @param[in,out] enabled_blocks A pointer to a uint64_t to which the enabled * blocks bits will be written. - * If this parameter is nullptr, this function will return - * ::AMDSMI_STATUS_INVAL if the function is supported with the provided, - * arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the - * provided arguments. + * If this parameter is nullptr, this function will return ::AMDSMI_STATUS_INVAL + * if the function is supported with the provided arguments and ::AMDSMI_STATUS_NOT_SUPPORTED + * if it is not supported with the provided arguments. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ @@ -4851,6 +5028,8 @@ amdsmi_status_t amdsmi_get_gpu_ecc_enabled(amdsmi_processor_handle processor_han amdsmi_status_t amdsmi_get_gpu_total_ecc_count(amdsmi_processor_handle processor_handle, amdsmi_error_count_t *ec); +/** @} End tagECCInfo */ + #pragma pack(push, 1) /** @@ -4904,48 +5083,6 @@ typedef struct { #pragma pack(pop) -/** - * @brief Retrieve CPER entries cached in the driver. - * - * The user will pass buffers to hold the CPER data and CPER headers. The library will - * fill the buffer based on the severity_mask user passed. It will also parse the CPER header - * and stored in the cper_hdrs array. The user can use the cper_hdrs to get the timestamp and other header information. - * A cursor is also returned to the user, which can be used to get the next set of CPER entries. - * - * If there are more data than any of the buffers user pass, the library will return AMDSMI_STATUS_MORE_DATA. - * User can call the API again with the cursor returned at previous call to get more data. - * If the buffer size is too small to even hold one entry, the library - * will return AMDSMI_STATUS_OUT_OF_RESOURCES. - * - * Even if the API returns AMDSMI_STATUS_MORE_DATA, the 2nd call may still get the entry_count == 0 as the driver - * cache may not contain the serverity user is interested in. The API should return AMDSMI_STATUS_SUCCESS in this case - * so that user can ignore that call. - * - * @ingroup tagECCInfo - * - * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} - * - * @param[in] processor_handle Handle to the processor for which CPER entries are to be retrieved. - * @param[in] severity_mask The severity mask of the entries to be retrieved. - * @param[in,out] cper_data Pointer to a buffer where the CPER data will be stored. User must allocate the buffer - * and set the buf_size correctly. - * @param[in,out] buf_size Pointer to a variable that specifies the size of the cper_data. - * On return, it will contain the actual size of the data written to the cper_data. - * @param[in,out] cper_hdrs Array of the parsed headers of the cper_data. The user must allocate - * the array of pointers to cper_hdr. The library will fill the array with the pointers to the parsed - * headers. The underlying data is in the cper_data buffer and only pointer is stored in this array. - * @param[in,out] entry_count Pointer to a variable that specifies the array length of the cper_hdrs user allocated. - * On return, it will contain the actual entries written to the cper_hdrs. - * @param[in,out] cursor Pointer to a variable that will contain the cursor for the next call. - * - * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail - */ -amdsmi_status_t -amdsmi_get_gpu_cper_entries(amdsmi_processor_handle processor_handle, uint32_t severity_mask, char *cper_data, - uint64_t *buf_size, amdsmi_cper_hdr_t** cper_hdrs, uint64_t *entry_count, uint64_t *cursor); - -/** @} End tagECCInfo */ - /*****************************************************************************/ /** @defgroup tagRasInfo RAS information * @{ @@ -4984,7 +5121,7 @@ amdsmi_status_t amdsmi_get_afids_from_cper(char* cper_buffer, uint32_t buf_size, * * @ingroup tagRasInfo * - * @platform{gpu_bm_linux} @platform{host} + * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} * * @param[in] processor_handle Device handle which to query * @@ -4995,6 +5132,92 @@ amdsmi_status_t amdsmi_get_afids_from_cper(char* cper_buffer, uint32_t buf_size, */ amdsmi_status_t amdsmi_get_gpu_ras_feature_info(amdsmi_processor_handle processor_handle, amdsmi_ras_feature_t *ras_feature); +/** + * @brief Get the RAS policy info for a device + * + * @ingroup tagRasInfo + * + * @platform{gpu_bm_linux} @platform{host} + * + * @details Given a processor handle @p processor_handle, this function will retrieve + * the RAS policy information for the device. + * + * @param[in] processor_handle PF of a processor for which to query + * + * @param[out] info RAS policy info for the device. Must be allocated by user. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_gpu_ras_policy_info(amdsmi_processor_handle processor_handle, + amdsmi_gpu_ras_policy_info_t *info); + +/** + * @brief Get the bad page threshold for a device + * + * @ingroup tagRasInfo + * + * @platform{gpu_bm_linux} @platform{host} + * + * @details Given a processor handle @p processor_handle and a pointer to a uint32_t @p threshold, + * this function will retrieve the bad page threshold value associated + * with device @p processor_handle and store the value at location pointed to by + * @p threshold. + * + * @note This function requires the admin/sudo privileges on @platform{gpu_bm_linux} + * + * @param[in] processor_handle a processor handle + * + * @param[in,out] threshold pointer to location where bad page threshold value will + * be written. + * If this parameter is nullptr, this function will return + * ::AMDSMI_STATUS_INVAL if the function is supported with the provided, + * arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the + * provided arguments. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_bad_page_threshold(amdsmi_processor_handle processor_handle, uint32_t *threshold); + +/** + * @brief Retrieve CPER entries cached in the driver. + * + * The user will pass buffers to hold the CPER data and CPER headers. The library will + * fill the buffer based on the severity_mask user passed. It will also parse the CPER header + * and stored in the cper_hdrs array. The user can use the cper_hdrs to get the timestamp and other header information. + * A cursor is also returned to the user, which can be used to get the next set of CPER entries. + * + * If there are more data than any of the buffers user pass, the library will return AMDSMI_STATUS_MORE_DATA. + * User can call the API again with the cursor returned at previous call to get more data. + * If the buffer size is too small to even hold one entry, the library + * will return AMDSMI_STATUS_OUT_OF_RESOURCES. + * + * Even if the API returns AMDSMI_STATUS_MORE_DATA, the 2nd call may still get the entry_count == 0 as the driver + * cache may not contain the serverity user is interested in. The API should return AMDSMI_STATUS_SUCCESS in this case + * so that user can ignore that call. + * + * @ingroup tagRasInfo + * + * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} + * + * @param[in] processor_handle Handle to the processor for which CPER entries are to be retrieved. + * @param[in] severity_mask The severity mask of the entries to be retrieved. + * @param[in,out] cper_data Pointer to a buffer where the CPER data will be stored. User must allocate the buffer + * and set the buf_size correctly. + * @param[in,out] buf_size Pointer to a variable that specifies the size of the cper_data. + * On return, it will contain the actual size of the data written to the cper_data. + * @param[in,out] cper_hdrs Array of the parsed headers of the cper_data. The user must allocate + * the array of pointers to cper_hdr. The library will fill the array with the pointers to the parsed + * headers. The underlying data is in the cper_data buffer and only pointer is stored in this array. + * @param[in,out] entry_count Pointer to a variable that specifies the array length of the cper_hdrs user allocated. + * On return, it will contain the actual entries written to the cper_hdrs. + * @param[in,out] cursor Pointer to a variable that will contain the cursor for the next call. + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t +amdsmi_get_gpu_cper_entries(amdsmi_processor_handle processor_handle, uint32_t severity_mask, char *cper_data, + uint64_t *buf_size, amdsmi_cper_hdr_t** cper_hdrs, uint64_t *entry_count, uint64_t *cursor); + /** @} End tagRasInfo */ /*****************************************************************************/ @@ -5043,7 +5266,8 @@ amdsmi_status_t amdsmi_get_gpu_ecc_status(amdsmi_processor_handle processor_hand * * @ingroup tagErrorQuery * - * @platform{gpu_bm_linux} @platform{host} @platform{cpu_bm} @platform{guest_1vf} @platform{guest_mvf} + * @platform{gpu_bm_linux} @platform{host} @platform{cpu_bm} + * @platform{guest_1vf} @platform{guest_mvf} @platform{guest_windows} * * @details Set the provided pointer to a const char *, @p status_string, to * a string containing a description of the provided error code @p status. @@ -5196,7 +5420,7 @@ amdsmi_gpu_counter_group_supported(amdsmi_processor_handle processor_handle, amd * with other performance event operations. The handle should be deallocated * with ::amdsmi_gpu_destroy_counter() when no longer needed. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] processor_handle a processor handle * @@ -5225,7 +5449,7 @@ amdsmi_gpu_create_counter(amdsmi_processor_handle processor_handle, amdsmi_event * @details Deallocate the performance counter object with the provided * ::amdsmi_event_handle_t @p evnt_handle * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] evnt_handle handle to event object to be deallocated * @@ -5245,7 +5469,7 @@ amdsmi_gpu_destroy_counter(amdsmi_event_handle_t evnt_handle); * @details Issue a command @p cmd on the event counter associated with the * provided handle @p evt_handle. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] evt_handle an event handle * @@ -5270,7 +5494,7 @@ amdsmi_gpu_control_counter(amdsmi_event_handle_t evt_handle, * provided handle @p evt_handle and write the value to the location pointed * to by @p value. * - * @note This function requires root access + * @note This function requires admin/sudo privileges * * @param[in] evt_handle an event handle * @@ -5788,7 +6012,7 @@ amdsmi_get_gpu_compute_partition(amdsmi_processor_handle processor_handle, * updated to. * * @retval ::AMDSMI_STATUS_SUCCESS call was successful - * @retval ::AMDSMI_STATUS_PERMISSION function requires root access + * @retval ::AMDSMI_STATUS_PERMISSION function requires admin/sudo privileges * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * @retval ::AMDSMI_STATUS_SETTING_UNAVAILABLE the provided setting is * unavailable for current device @@ -5800,6 +6024,27 @@ amdsmi_status_t amdsmi_set_gpu_compute_partition(amdsmi_processor_handle processor_handle, amdsmi_compute_partition_type_t compute_partition); +/** + * @brief Reverts a selected device's compute partition setting back to its + * boot state. + * + * @ingroup tagComputePartition + * + * @platform{gpu_bm_linux} + * + * @details Given a processor handle @p processor_handle, this function will attempt to + * revert its compute partition setting back to its boot state. + * + * @param[in] processor_handle Device which to query + * + * @retval ::AMDSMI_STATUS_SUCCESS call was successful + * @retval ::AMDSMI_STATUS_PERMISSION function requires admin/sudo privileges + * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function + * @return ::amdsmi_status_t + */ +amdsmi_status_t amdsmi_reset_gpu_compute_partition(amdsmi_processor_handle processor_handle); + /** @} End tagComputePartition */ /*****************************************************************************/ @@ -5865,7 +6110,7 @@ amdsmi_get_gpu_memory_partition(amdsmi_processor_handle processor_handle, char * * define what the selected device's current mode setting should be updated to. * * @retval ::AMDSMI_STATUS_SUCCESS call was successful - * @retval ::AMDSMI_STATUS_PERMISSION function requires root access + * @retval ::AMDSMI_STATUS_PERMISSION function requires admin/sudo privileges * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function @@ -5876,6 +6121,30 @@ amdsmi_get_gpu_memory_partition(amdsmi_processor_handle processor_handle, char * amdsmi_status_t amdsmi_set_gpu_memory_partition(amdsmi_processor_handle processor_handle, amdsmi_memory_partition_type_t memory_partition); + +/** + * @brief Reverts a selected device's memory partition setting back to its + * boot state. + * + * @ingroup tagMemoryPartition + * + * @platform{gpu_bm_linux} + * + * @details Given a processor handle @p processor_handle, this function will attempt to + * revert its current memory partition setting back to its boot state. + * + * @param[in] processor_handle Device which to query + * + * @retval ::AMDSMI_STATUS_SUCCESS call was successful + * @retval ::AMDSMI_STATUS_PERMISSION function requires admin/sudo privileges + * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function + * @retval ::AMDSMI_STATUS_AMDGPU_RESTART_ERR could not successfully restart + * the amdgpu driver + * @return ::amdsmi_status_t + */ +amdsmi_status_t amdsmi_reset_gpu_memory_partition(amdsmi_processor_handle processor_handle); + /** * @brief Returns current gpu memory partition capabilities * @@ -5896,8 +6165,8 @@ amdsmi_get_gpu_memory_partition_config(amdsmi_processor_handle processor_handle, /** * @brief Sets memory partition mode - * Set accelerator partition setting based on profile_index - * from amdsmi_get_gpu_accelerator_partition_profile_config + * Set memory partition setting based on memory_partition mode + * from amdsmi_get_gpu_memory_partition_config * * @ingroup tagMemoryPartition * @@ -5908,6 +6177,9 @@ amdsmi_get_gpu_memory_partition_config(amdsmi_processor_handle processor_handle, * device's memory partition setting. This function does not allow any concurrent operations. * Device must be idle and have no workloads when performing set partition operations. * + * @details On @platform{gpu_bm_linux} AMDGPU driver restart is REQUIRED to complete updating + * to the new memory partition setting. Refer to `amdsmi_gpu_driver_reload()` for more details. + * * On @platform{gpu_bm_linux} AMDGPU driver restart is REQUIRED to complete updating to * the new memory partition setting. Refer to `amdsmi_gpu_driver_reload()` for more details. * @@ -5937,8 +6209,8 @@ amdsmi_set_gpu_memory_partition_mode(amdsmi_processor_handle processor_handle, * * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf} * - * @note User must use admin/elevated privledges to run this API, or API will not be able to read resources. - * Otherwise, API will fill in the structure with as much information as possible. + * @note API requires admin/sudo privileges or API will not be able to read all resources + * for @platform{gpu_bm_linux} or any resources for @platform{host}. * * @param[in] processor_handle Device which to query * @@ -5956,8 +6228,8 @@ amdsmi_get_gpu_accelerator_partition_profile_config(amdsmi_processor_handle proc * * @ingroup tagAcceleratorPartition * - * @note User must use admin/elevated privledges to run this API, or API will not be able to read resources. - * Otherwise, API will fill in the structure with as much information as possible. + * @note API requires admin/sudo privileges or API will not be able to read all resources + * for @platform{gpu_bm_linux} or any resources for @platform{host}. * * @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf} * @@ -5984,8 +6256,8 @@ amdsmi_get_gpu_accelerator_partition_profile(amdsmi_processor_handle processor_h * * @platform{gpu_bm_linux} @platform{host} * - * @note On @platform{gpu_bm_linux} User must use admin/elevated privledges - * to run this API, or API will not be able to read resources. + * @note API requires admin/sudo privileges or API will not be able to read all resources + * for @platform{gpu_bm_linux} or any resources for @platform{host}. * * @param[in] processor_handle Device which to query * @@ -6235,7 +6507,7 @@ amdsmi_get_gpu_board_info(amdsmi_processor_handle processor_handle, amdsmi_board * * @ingroup tagAsicBoardInfo * - * @platform{gpu_bm_linux} @platform{host} + * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} * * @param[in] processor_handle Device which to query * @@ -6287,6 +6559,12 @@ amdsmi_status_t amdsmi_get_pcie_info(amdsmi_processor_handle processor_handle, a amdsmi_status_t amdsmi_get_gpu_xcd_counter(amdsmi_processor_handle processor_handle, uint16_t *xcd_count); +/** @} End tagAsicBoardInfo */ + +/** @defgroup tagNodeInfo Node Information + * @{ + */ + /** * @brief Retrieves node power management (NPM) status and power limit for the specified node. * @@ -6306,7 +6584,7 @@ amdsmi_status_t amdsmi_get_gpu_xcd_counter(amdsmi_processor_handle processor_han */ amdsmi_status_t amdsmi_get_npm_info(amdsmi_node_handle node_handle, amdsmi_npm_info_t *info); -/** @} End tagAsicBoardInfo */ +/** @} End tagNodeInfo */ /*****************************************************************************/ /** @defgroup tagFWVbiosQuery Firmware & VBIOS queries @@ -6410,7 +6688,7 @@ amdsmi_get_gpu_activity(amdsmi_processor_handle processor_handle, amdsmi_engine_ * * @ingroup tagGPUMonitor * - * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} @platform{guest_1vf} + * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} * * @note amdsmi_power_info_t::socket_power metric can rarely spike above the socket power limit in some cases * @note unsupported struct members are set to UINT32_MAX @@ -6462,17 +6740,15 @@ amdsmi_status_t amdsmi_get_clock_info(amdsmi_processor_handle processor_handle, amdsmi_clk_type_t clk_type, amdsmi_clk_info_t *info); /** - * @brief Returns the VRAM usage (both total and used memory) - * in MegaBytes. + * @brief Returns the VRAM usage (both total and used memory) in MegaBytes. * * @ingroup tagGPUMonitor * - * @platform{gpu_bm_linux} + * @platform{gpu_bm_linux} @platform{guest_windows} * * @param[in] processor_handle Device which to query * - * @param[out] info Reference to vram information. - * Must be allocated by user. + * @param[out] info Reference to vram information. Must be allocated by user. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ @@ -6617,7 +6893,7 @@ amdsmi_get_gpu_process_list(amdsmi_processor_handle processor_handle, uint32_t * * hive to be reconfigured with the specified memory partition configuration. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success - * @return | ::AMDSMI_STATUS_NO_PERM function requires root access + * @return | ::AMDSMI_STATUS_NO_PERM function requires admin/sudo privileges * @return | ::AMDSMI_STATUS_AMDGPU_RESTART_ERR could not successfully restart * the amdgpu driver. */ @@ -6680,12 +6956,13 @@ amdsmi_status_t amdsmi_set_gpu_ptl_state(amdsmi_processor_handle processor_handl * * @platform{gpu_bm_linux} @platform{host} * - * @details This function retrieves the current PTL fromats - * for the specified processor. PTL constrains the product to never deliver more - * than a specified TOPS/second. + * @details This function retrieves the current PTL formats + * for the specified processor. PTL prevents the product to never deliver more + * than a specified TOPS/second. If function returns 0 for both formats, + * PTL was never enabled before on that system * * @param[in] processor_handle Device which to query -* + * * @param[out] data_format1 Pointer to first preferred data format that receives peak performance * * @param[out] data_format2 Pointer to second preferred data format that receives peak performance @@ -6722,7 +6999,7 @@ amdsmi_get_gpu_ptl_formats(amdsmi_processor_handle processor_handle, * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, * ::AMDSMI_STATUS_NOT_SUPPORTED if PTL is not supported on this device, * non-zero on other failures - **/ + */ amdsmi_status_t amdsmi_set_gpu_ptl_formats(amdsmi_processor_handle processor_handle, amdsmi_ptl_data_format_t data_format1, diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py index d06a02f4a0..f9298a5f10 100644 --- a/projects/amdsmi/py-interface/amdsmi_wrapper.py +++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py @@ -218,6 +218,16 @@ except OSError as e: amdsmi_free_name_value_pairs = _libraries['libamd_smi.so'].amdsmi_free_name_value_pairs amdsmi_free_name_value_pairs.restype = None amdsmi_free_name_value_pairs.argtypes = [ctypes.POINTER(None)] +class FunctionFactoryStub: + def __getattr__(self, _): + return ctypes.CFUNCTYPE(lambda y:y) + +# libraries['FIXME_STUB'] explanation +# As you did not list (-l libraryname.so) a library that exports this function +# This is a non-working stub instead. +# You can either re-run clan2py with -l /path/to/library.so +# Or manually fix this by comment the ctypes.CDLL loading +_libraries['FIXME_STUB'] = FunctionFactoryStub() # ctypes.CDLL('FIXME_STUB') @@ -283,6 +293,7 @@ processor_type_t__enumvalues = { 4: 'AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU', 5: 'AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE', 6: 'AMDSMI_PROCESSOR_TYPE_AMD_APU', + 7: 'AMDSMI_PROCESSOR_TYPE_AMD_NIC', } AMDSMI_PROCESSOR_TYPE_UNKNOWN = 0 AMDSMI_PROCESSOR_TYPE_AMD_GPU = 1 @@ -291,6 +302,7 @@ AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU = 3 AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU = 4 AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE = 5 AMDSMI_PROCESSOR_TYPE_AMD_APU = 6 +AMDSMI_PROCESSOR_TYPE_AMD_NIC = 7 processor_type_t = ctypes.c_uint32 # enum # values for enumeration 'amdsmi_status_t' @@ -304,6 +316,7 @@ amdsmi_status_t__enumvalues = { 6: 'AMDSMI_STATUS_DRM_ERROR', 7: 'AMDSMI_STATUS_API_FAILED', 8: 'AMDSMI_STATUS_TIMEOUT', + 9: 'AMDSMI_STATUS_RETRY', 10: 'AMDSMI_STATUS_NO_PERM', 11: 'AMDSMI_STATUS_INTERRUPT', 12: 'AMDSMI_STATUS_IO', @@ -977,6 +990,21 @@ amdsmi_card_form_factor_t = ctypes.c_uint32 # enum class struct_amdsmi_pcie_info_t(Structure): pass +class struct_pcie_static_(Structure): + pass + +struct_pcie_static_._pack_ = 1 # source:False +struct_pcie_static_._fields_ = [ + ('max_pcie_width', ctypes.c_uint16), + ('PADDING_0', ctypes.c_ubyte * 2), + ('max_pcie_speed', ctypes.c_uint32), + ('pcie_interface_version', ctypes.c_uint32), + ('slot_type', amdsmi_card_form_factor_t), + ('max_pcie_interface_version', ctypes.c_uint32), + ('PADDING_1', ctypes.c_ubyte * 4), + ('reserved', ctypes.c_uint64 * 9), +] + class struct_pcie_metric_(Structure): pass @@ -997,21 +1025,6 @@ struct_pcie_metric_._fields_ = [ ('reserved', ctypes.c_uint64 * 12), ] -class struct_pcie_static_(Structure): - pass - -struct_pcie_static_._pack_ = 1 # source:False -struct_pcie_static_._fields_ = [ - ('max_pcie_width', ctypes.c_uint16), - ('PADDING_0', ctypes.c_ubyte * 2), - ('max_pcie_speed', ctypes.c_uint32), - ('pcie_interface_version', ctypes.c_uint32), - ('slot_type', amdsmi_card_form_factor_t), - ('max_pcie_interface_version', ctypes.c_uint32), - ('PADDING_1', ctypes.c_ubyte * 4), - ('reserved', ctypes.c_uint64 * 9), -] - struct_amdsmi_pcie_info_t._pack_ = 1 # source:False struct_amdsmi_pcie_info_t._fields_ = [ ('pcie_static', struct_pcie_static_), @@ -1275,6 +1288,19 @@ struct_amdsmi_cpu_util_t._fields_ = [ ] amdsmi_cpu_util_t = struct_amdsmi_cpu_util_t + +# values for enumeration 'amdsmi_link_status_t' +amdsmi_link_status_t__enumvalues = { + 0: 'AMDSMI_LINK_STATUS_ENABLED', + 1: 'AMDSMI_LINK_STATUS_DISABLED', + 2: 'AMDSMI_LINK_STATUS_INACTIVE', + 3: 'AMDSMI_LINK_STATUS_ERROR', +} +AMDSMI_LINK_STATUS_ENABLED = 0 +AMDSMI_LINK_STATUS_DISABLED = 1 +AMDSMI_LINK_STATUS_INACTIVE = 2 +AMDSMI_LINK_STATUS_ERROR = 3 +amdsmi_link_status_t = ctypes.c_uint32 # enum class struct_amdsmi_link_metrics_t(Structure): pass @@ -1290,6 +1316,8 @@ struct__links._fields_ = [ ('PADDING_0', ctypes.c_ubyte * 4), ('read', ctypes.c_uint64), ('write', ctypes.c_uint64), + ('link_status', amdsmi_link_status_t), + ('PADDING_1', ctypes.c_ubyte * 4), ('reserved', ctypes.c_uint64 * 1), ] @@ -1801,6 +1829,37 @@ AMDSMI_CPER_NOTIFY_TYPE_SEI = 5658685719731260545 AMDSMI_CPER_NOTIFY_TYPE_PEI = 4761520883332928940 AMDSMI_CPER_NOTIFY_TYPE_CXL_COMPONENT = 5306157213770398665 amdsmi_cper_notify_type_t = ctypes.c_uint64 # enum +class struct_amdsmi_gpu_ras_policy_v4_0_t(Structure): + pass + +struct_amdsmi_gpu_ras_policy_v4_0_t._pack_ = 1 # source:False +struct_amdsmi_gpu_ras_policy_v4_0_t._fields_ = [ + ('dram_non_critical_region_threshold', ctypes.c_uint16), + ('dram_critical_region_threshold', ctypes.c_uint16), +] + +amdsmi_gpu_ras_policy_v4_0_t = struct_amdsmi_gpu_ras_policy_v4_0_t +class struct_amdsmi_gpu_ras_policy_info_t(Structure): + pass + +class union_policy_data_(Union): + pass + +union_policy_data_._pack_ = 1 # source:False +union_policy_data_._fields_ = [ + ('v4_0', amdsmi_gpu_ras_policy_v4_0_t), + ('info', ctypes.c_uint64 * 5), +] + +struct_amdsmi_gpu_ras_policy_info_t._pack_ = 1 # source:False +struct_amdsmi_gpu_ras_policy_info_t._fields_ = [ + ('major_version', ctypes.c_ubyte), + ('minor_version', ctypes.c_ubyte), + ('PADDING_0', ctypes.c_ubyte * 6), + ('policy_data', union_policy_data_), +] + +amdsmi_gpu_ras_policy_info_t = struct_amdsmi_gpu_ras_policy_info_t # values for enumeration 'amdsmi_ras_err_state_t' amdsmi_ras_err_state_t__enumvalues = { @@ -2193,10 +2252,24 @@ amdsmi_reg_type_t = ctypes.c_uint32 # enum class struct_amdsmi_ras_feature_t(Structure): pass +class struct_ras_info_(Structure): + pass + +struct_ras_info_._pack_ = 1 # source:False +struct_ras_info_._fields_ = [ + ('dram_ecc', ctypes.c_uint32, 1), + ('sram_ecc', ctypes.c_uint32, 1), + ('poisoning', ctypes.c_uint32, 1), + ('rsvd', ctypes.c_uint32, 29), +] + struct_amdsmi_ras_feature_t._pack_ = 1 # source:False struct_amdsmi_ras_feature_t._fields_ = [ ('ras_eeprom_version', ctypes.c_uint32), ('ecc_correction_schema_flag', ctypes.c_uint32), + ('ras_info', struct_ras_info_), + ('needs_reboot', ctypes.c_bool), + ('PADDING_0', ctypes.c_ubyte * 3), ] amdsmi_ras_feature_t = struct_amdsmi_ras_feature_t @@ -2283,6 +2356,7 @@ struct_amdsmi_npm_info_t._fields_ = [ ] amdsmi_npm_info_t = struct_amdsmi_npm_info_t + # values for enumeration 'amdsmi_ptl_data_format_t' amdsmi_ptl_data_format_t__enumvalues = { 0: 'AMDSMI_PTL_DATA_FORMAT_I8', @@ -2510,6 +2584,9 @@ amdsmi_shut_down.argtypes = [] amdsmi_get_socket_handles = _libraries['libamd_smi.so'].amdsmi_get_socket_handles amdsmi_get_socket_handles.restype = amdsmi_status_t amdsmi_get_socket_handles.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.POINTER(None))] +amdsmi_get_index_from_processor_handle = _libraries['FIXME_STUB'].amdsmi_get_index_from_processor_handle +amdsmi_get_index_from_processor_handle.restype = amdsmi_status_t +amdsmi_get_index_from_processor_handle.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)] amdsmi_get_cpu_handles = _libraries['libamd_smi.so'].amdsmi_get_cpu_handles amdsmi_get_cpu_handles.restype = amdsmi_status_t amdsmi_get_cpu_handles.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.POINTER(None))] @@ -2520,6 +2597,9 @@ amdsmi_get_socket_info.argtypes = [amdsmi_socket_handle, size_t, ctypes.POINTER( amdsmi_get_processor_info = _libraries['libamd_smi.so'].amdsmi_get_processor_info amdsmi_get_processor_info.restype = amdsmi_status_t amdsmi_get_processor_info.argtypes = [amdsmi_processor_handle, size_t, ctypes.POINTER(ctypes.c_char)] +amdsmi_get_cpusocket_handles = _libraries['FIXME_STUB'].amdsmi_get_cpusocket_handles +amdsmi_get_cpusocket_handles.restype = amdsmi_status_t +amdsmi_get_cpusocket_handles.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.POINTER(None))] amdsmi_get_processor_count_from_handles = _libraries['libamd_smi.so'].amdsmi_get_processor_count_from_handles amdsmi_get_processor_count_from_handles.restype = amdsmi_status_t amdsmi_get_processor_count_from_handles.argtypes = [ctypes.POINTER(ctypes.POINTER(None)), ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_uint32)] @@ -2538,19 +2618,28 @@ amdsmi_get_cpucore_handles.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.P amdsmi_get_processor_type = _libraries['libamd_smi.so'].amdsmi_get_processor_type amdsmi_get_processor_type.restype = amdsmi_status_t amdsmi_get_processor_type.argtypes = [amdsmi_processor_handle, ctypes.POINTER(processor_type_t)] +uint32_t = ctypes.c_uint32 +amdsmi_get_processor_handle_from_index = _libraries['FIXME_STUB'].amdsmi_get_processor_handle_from_index +amdsmi_get_processor_handle_from_index.restype = amdsmi_status_t +amdsmi_get_processor_handle_from_index.argtypes = [uint32_t, ctypes.POINTER(ctypes.POINTER(None))] amdsmi_get_processor_handle_from_bdf = _libraries['libamd_smi.so'].amdsmi_get_processor_handle_from_bdf amdsmi_get_processor_handle_from_bdf.restype = amdsmi_status_t amdsmi_get_processor_handle_from_bdf.argtypes = [amdsmi_bdf_t, ctypes.POINTER(ctypes.POINTER(None))] amdsmi_get_gpu_device_bdf = _libraries['libamd_smi.so'].amdsmi_get_gpu_device_bdf amdsmi_get_gpu_device_bdf.restype = amdsmi_status_t amdsmi_get_gpu_device_bdf.argtypes = [amdsmi_processor_handle, ctypes.POINTER(union_amdsmi_bdf_t)] +amdsmi_get_processor_bdf = _libraries['FIXME_STUB'].amdsmi_get_processor_bdf +amdsmi_get_processor_bdf.restype = amdsmi_status_t +amdsmi_get_processor_bdf.argtypes = [amdsmi_processor_handle, ctypes.POINTER(union_amdsmi_bdf_t)] +amdsmi_get_processor_handle_from_uuid = _libraries['FIXME_STUB'].amdsmi_get_processor_handle_from_uuid +amdsmi_get_processor_handle_from_uuid.restype = amdsmi_status_t +amdsmi_get_processor_handle_from_uuid.argtypes = [ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.POINTER(None))] amdsmi_get_gpu_device_uuid = _libraries['libamd_smi.so'].amdsmi_get_gpu_device_uuid amdsmi_get_gpu_device_uuid.restype = amdsmi_status_t amdsmi_get_gpu_device_uuid.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_char)] amdsmi_get_gpu_enumeration_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_enumeration_info amdsmi_get_gpu_enumeration_info.restype = amdsmi_status_t amdsmi_get_gpu_enumeration_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_enumeration_info_t)] -uint32_t = ctypes.c_uint32 amdsmi_get_cpu_affinity_with_scope = _libraries['libamd_smi.so'].amdsmi_get_cpu_affinity_with_scope amdsmi_get_cpu_affinity_with_scope.restype = amdsmi_status_t amdsmi_get_cpu_affinity_with_scope.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.POINTER(ctypes.c_uint64), amdsmi_affinity_scope_t] @@ -2838,15 +2927,21 @@ struct_amdsmi_cper_hdr_t._fields_ = [ ] amdsmi_cper_hdr_t = struct_amdsmi_cper_hdr_t -amdsmi_get_gpu_cper_entries = _libraries['libamd_smi.so'].amdsmi_get_gpu_cper_entries -amdsmi_get_gpu_cper_entries.restype = amdsmi_status_t -amdsmi_get_gpu_cper_entries.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.POINTER(struct_amdsmi_cper_hdr_t)), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.c_uint64)] amdsmi_get_afids_from_cper = _libraries['libamd_smi.so'].amdsmi_get_afids_from_cper amdsmi_get_afids_from_cper.restype = amdsmi_status_t amdsmi_get_afids_from_cper.argtypes = [ctypes.POINTER(ctypes.c_char), uint32_t, ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.c_uint32)] amdsmi_get_gpu_ras_feature_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_ras_feature_info amdsmi_get_gpu_ras_feature_info.restype = amdsmi_status_t amdsmi_get_gpu_ras_feature_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_ras_feature_t)] +amdsmi_get_gpu_ras_policy_info = _libraries['FIXME_STUB'].amdsmi_get_gpu_ras_policy_info +amdsmi_get_gpu_ras_policy_info.restype = amdsmi_status_t +amdsmi_get_gpu_ras_policy_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_gpu_ras_policy_info_t)] +amdsmi_get_bad_page_threshold = _libraries['FIXME_STUB'].amdsmi_get_bad_page_threshold +amdsmi_get_bad_page_threshold.restype = amdsmi_status_t +amdsmi_get_bad_page_threshold.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)] +amdsmi_get_gpu_cper_entries = _libraries['libamd_smi.so'].amdsmi_get_gpu_cper_entries +amdsmi_get_gpu_cper_entries.restype = amdsmi_status_t +amdsmi_get_gpu_cper_entries.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.POINTER(struct_amdsmi_cper_hdr_t)), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.c_uint64)] amdsmi_get_gpu_ecc_status = _libraries['libamd_smi.so'].amdsmi_get_gpu_ecc_status amdsmi_get_gpu_ecc_status.restype = amdsmi_status_t amdsmi_get_gpu_ecc_status.argtypes = [amdsmi_processor_handle, amdsmi_gpu_block_t, ctypes.POINTER(amdsmi_ras_err_state_t)] @@ -2922,12 +3017,18 @@ amdsmi_get_gpu_compute_partition.argtypes = [amdsmi_processor_handle, ctypes.POI amdsmi_set_gpu_compute_partition = _libraries['libamd_smi.so'].amdsmi_set_gpu_compute_partition amdsmi_set_gpu_compute_partition.restype = amdsmi_status_t amdsmi_set_gpu_compute_partition.argtypes = [amdsmi_processor_handle, amdsmi_compute_partition_type_t] +amdsmi_reset_gpu_compute_partition = _libraries['FIXME_STUB'].amdsmi_reset_gpu_compute_partition +amdsmi_reset_gpu_compute_partition.restype = amdsmi_status_t +amdsmi_reset_gpu_compute_partition.argtypes = [amdsmi_processor_handle] amdsmi_get_gpu_memory_partition = _libraries['libamd_smi.so'].amdsmi_get_gpu_memory_partition amdsmi_get_gpu_memory_partition.restype = amdsmi_status_t amdsmi_get_gpu_memory_partition.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_char), uint32_t] amdsmi_set_gpu_memory_partition = _libraries['libamd_smi.so'].amdsmi_set_gpu_memory_partition amdsmi_set_gpu_memory_partition.restype = amdsmi_status_t amdsmi_set_gpu_memory_partition.argtypes = [amdsmi_processor_handle, amdsmi_memory_partition_type_t] +amdsmi_reset_gpu_memory_partition = _libraries['FIXME_STUB'].amdsmi_reset_gpu_memory_partition +amdsmi_reset_gpu_memory_partition.restype = amdsmi_status_t +amdsmi_reset_gpu_memory_partition.argtypes = [amdsmi_processor_handle] amdsmi_get_gpu_memory_partition_config = _libraries['libamd_smi.so'].amdsmi_get_gpu_memory_partition_config amdsmi_get_gpu_memory_partition_config.restype = amdsmi_status_t amdsmi_get_gpu_memory_partition_config.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_memory_partition_config_t)] @@ -3277,7 +3378,9 @@ __all__ = \ 'AMDSMI_GPU_BLOCK_XGMI_WAFL', 'AMDSMI_INIT_ALL_PROCESSORS', 'AMDSMI_INIT_AMD_APUS', 'AMDSMI_INIT_AMD_CPUS', 'AMDSMI_INIT_AMD_GPUS', 'AMDSMI_INIT_NON_AMD_CPUS', - 'AMDSMI_INIT_NON_AMD_GPUS', 'AMDSMI_LINK_TYPE_INTERNAL', + 'AMDSMI_INIT_NON_AMD_GPUS', 'AMDSMI_LINK_STATUS_DISABLED', + 'AMDSMI_LINK_STATUS_ENABLED', 'AMDSMI_LINK_STATUS_ERROR', + 'AMDSMI_LINK_STATUS_INACTIVE', 'AMDSMI_LINK_TYPE_INTERNAL', 'AMDSMI_LINK_TYPE_NOT_APPLICABLE', 'AMDSMI_LINK_TYPE_PCIE', 'AMDSMI_LINK_TYPE_UNKNOWN', 'AMDSMI_LINK_TYPE_XGMI', 'AMDSMI_MEMORY_PARTITION_NPS1', 'AMDSMI_MEMORY_PARTITION_NPS2', @@ -3294,7 +3397,7 @@ __all__ = \ 'AMDSMI_POWER_CAP_TYPE_PPT1', 'AMDSMI_PROCESSOR_TYPE_AMD_APU', 'AMDSMI_PROCESSOR_TYPE_AMD_CPU', 'AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE', - 'AMDSMI_PROCESSOR_TYPE_AMD_GPU', + 'AMDSMI_PROCESSOR_TYPE_AMD_GPU', 'AMDSMI_PROCESSOR_TYPE_AMD_NIC', 'AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU', 'AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU', 'AMDSMI_PROCESSOR_TYPE_UNKNOWN', 'AMDSMI_PTL_DATA_FORMAT_BF16', @@ -3336,7 +3439,7 @@ __all__ = \ 'AMDSMI_STATUS_NO_HSMP_SUP', 'AMDSMI_STATUS_NO_MSR_DRV', 'AMDSMI_STATUS_NO_PERM', 'AMDSMI_STATUS_NO_SLOT', 'AMDSMI_STATUS_OUT_OF_RESOURCES', - 'AMDSMI_STATUS_REFCOUNT_OVERFLOW', + 'AMDSMI_STATUS_REFCOUNT_OVERFLOW', 'AMDSMI_STATUS_RETRY', 'AMDSMI_STATUS_SETTING_UNAVAILABLE', 'AMDSMI_STATUS_SUCCESS', 'AMDSMI_STATUS_TIMEOUT', 'AMDSMI_STATUS_UNEXPECTED_DATA', 'AMDSMI_STATUS_UNEXPECTED_SIZE', 'AMDSMI_STATUS_UNKNOWN_ERROR', @@ -3457,9 +3560,9 @@ __all__ = \ 'amdsmi_freq_volt_region_t', 'amdsmi_frequencies_t', 'amdsmi_frequency_range_t', 'amdsmi_fw_block_t', 'amdsmi_fw_info_t', 'amdsmi_get_afids_from_cper', - 'amdsmi_get_clk_freq', 'amdsmi_get_clock_info', - 'amdsmi_get_cpu_affinity_with_scope', 'amdsmi_get_cpu_cclk_limit', - 'amdsmi_get_cpu_core_boostlimit', + 'amdsmi_get_bad_page_threshold', 'amdsmi_get_clk_freq', + 'amdsmi_get_clock_info', 'amdsmi_get_cpu_affinity_with_scope', + 'amdsmi_get_cpu_cclk_limit', 'amdsmi_get_cpu_core_boostlimit', 'amdsmi_get_cpu_core_current_freq_limit', 'amdsmi_get_cpu_core_energy', 'amdsmi_get_cpu_cores_per_socket', 'amdsmi_get_cpu_current_io_bandwidth', @@ -3482,8 +3585,8 @@ __all__ = \ 'amdsmi_get_cpu_socket_power', 'amdsmi_get_cpu_socket_power_cap', 'amdsmi_get_cpu_socket_power_cap_max', 'amdsmi_get_cpu_socket_temperature', 'amdsmi_get_cpucore_handles', - 'amdsmi_get_energy_count', 'amdsmi_get_esmi_err_msg', - 'amdsmi_get_fw_info', + 'amdsmi_get_cpusocket_handles', 'amdsmi_get_energy_count', + 'amdsmi_get_esmi_err_msg', 'amdsmi_get_fw_info', 'amdsmi_get_gpu_accelerator_partition_profile', 'amdsmi_get_gpu_accelerator_partition_profile_config', 'amdsmi_get_gpu_activity', 'amdsmi_get_gpu_asic_info', @@ -3521,8 +3624,9 @@ __all__ = \ 'amdsmi_get_gpu_ptl_formats', 'amdsmi_get_gpu_ptl_state', 'amdsmi_get_gpu_ras_block_features_enabled', 'amdsmi_get_gpu_ras_feature_info', - 'amdsmi_get_gpu_reg_table_info', 'amdsmi_get_gpu_revision', - 'amdsmi_get_gpu_subsystem_id', 'amdsmi_get_gpu_subsystem_name', + 'amdsmi_get_gpu_ras_policy_info', 'amdsmi_get_gpu_reg_table_info', + 'amdsmi_get_gpu_revision', 'amdsmi_get_gpu_subsystem_id', + 'amdsmi_get_gpu_subsystem_name', 'amdsmi_get_gpu_topo_numa_affinity', 'amdsmi_get_gpu_total_ecc_count', 'amdsmi_get_gpu_vbios_info', 'amdsmi_get_gpu_vendor_name', @@ -3531,14 +3635,18 @@ __all__ = \ 'amdsmi_get_gpu_vram_usage', 'amdsmi_get_gpu_vram_vendor', 'amdsmi_get_gpu_xcd_counter', 'amdsmi_get_gpu_xgmi_link_status', 'amdsmi_get_hsmp_metrics_table', - 'amdsmi_get_hsmp_metrics_table_version', 'amdsmi_get_lib_version', - 'amdsmi_get_link_metrics', 'amdsmi_get_link_topology_nearest', + 'amdsmi_get_hsmp_metrics_table_version', + 'amdsmi_get_index_from_processor_handle', + 'amdsmi_get_lib_version', 'amdsmi_get_link_metrics', + 'amdsmi_get_link_topology_nearest', 'amdsmi_get_minmax_bandwidth_between_processors', 'amdsmi_get_node_handle', 'amdsmi_get_npm_info', 'amdsmi_get_pcie_info', 'amdsmi_get_power_cap_info', - 'amdsmi_get_power_info', + 'amdsmi_get_power_info', 'amdsmi_get_processor_bdf', 'amdsmi_get_processor_count_from_handles', 'amdsmi_get_processor_handle_from_bdf', + 'amdsmi_get_processor_handle_from_index', + 'amdsmi_get_processor_handle_from_uuid', 'amdsmi_get_processor_handles', 'amdsmi_get_processor_handles_by_type', 'amdsmi_get_processor_info', 'amdsmi_get_processor_type', @@ -3551,7 +3659,8 @@ __all__ = \ 'amdsmi_gpu_control_counter', 'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter', 'amdsmi_gpu_destroy_counter', 'amdsmi_gpu_driver_reload', - 'amdsmi_gpu_metrics_t', 'amdsmi_gpu_read_counter', + 'amdsmi_gpu_metrics_t', 'amdsmi_gpu_ras_policy_info_t', + 'amdsmi_gpu_ras_policy_v4_0_t', 'amdsmi_gpu_read_counter', 'amdsmi_gpu_validate_ras_eeprom', 'amdsmi_gpu_xcp_metrics_t', 'amdsmi_gpu_xgmi_error_status', 'amdsmi_hsmp_driver_version_t', 'amdsmi_hsmp_freqlimit_src_names', 'amdsmi_hsmp_metrics_table_t', @@ -3560,8 +3669,8 @@ __all__ = \ 'amdsmi_is_P2P_accessible', 'amdsmi_is_gpu_power_management_enabled', 'amdsmi_kfd_info_t', 'amdsmi_link_id_bw_type_t', 'amdsmi_link_metrics_t', - 'amdsmi_link_type_t', 'amdsmi_memory_page_status_t', - 'amdsmi_memory_partition_config_t', + 'amdsmi_link_status_t', 'amdsmi_link_type_t', + 'amdsmi_memory_page_status_t', 'amdsmi_memory_partition_config_t', 'amdsmi_memory_partition_type_t', 'amdsmi_memory_type_t', 'amdsmi_mm_ip_t', 'amdsmi_name_value_t', 'amdsmi_node_handle', 'amdsmi_npm_info_t', 'amdsmi_npm_status_t', 'amdsmi_nps_caps_t', @@ -3575,9 +3684,10 @@ __all__ = \ 'amdsmi_processor_handle', 'amdsmi_ptl_data_format_t', 'amdsmi_range_t', 'amdsmi_ras_err_state_t', 'amdsmi_ras_feature_t', 'amdsmi_reg_type_t', 'amdsmi_reset_gpu', - 'amdsmi_reset_gpu_fan', 'amdsmi_reset_gpu_xgmi_error', - 'amdsmi_retired_page_record_t', 'amdsmi_set_clk_freq', - 'amdsmi_set_cpu_core_boostlimit', + 'amdsmi_reset_gpu_compute_partition', 'amdsmi_reset_gpu_fan', + 'amdsmi_reset_gpu_memory_partition', + 'amdsmi_reset_gpu_xgmi_error', 'amdsmi_retired_page_record_t', + 'amdsmi_set_clk_freq', 'amdsmi_set_cpu_core_boostlimit', 'amdsmi_set_cpu_df_pstate_range', 'amdsmi_set_cpu_gmi3_link_width_range', 'amdsmi_set_cpu_pcie_link_rate', @@ -3632,6 +3742,8 @@ __all__ = \ 'struct_amdsmi_freq_volt_region_t', 'struct_amdsmi_frequencies_t', 'struct_amdsmi_frequency_range_t', 'struct_amdsmi_fw_info_t', 'struct_amdsmi_gpu_cache_info_t', 'struct_amdsmi_gpu_metrics_t', + 'struct_amdsmi_gpu_ras_policy_info_t', + 'struct_amdsmi_gpu_ras_policy_v4_0_t', 'struct_amdsmi_gpu_xcp_metrics_t', 'struct_amdsmi_hsmp_driver_version_t', 'struct_amdsmi_hsmp_metrics_table_t', 'struct_amdsmi_kfd_info_t', @@ -3657,8 +3769,9 @@ __all__ = \ 'struct_amdsmi_xgmi_link_status_t', 'struct_bdf_', 'struct_cache_', 'struct_engine_usage_', 'struct_fw_info_list_', 'struct_memory_usage_', 'struct_nps_flags_', 'struct_numa_range_', - 'struct_pcie_metric_', 'struct_pcie_static_', - 'struct_amdsmi_bdf_t', 'struct_valid_bits_', 'uint32_t', - 'uint64_t', 'uint8_t', 'union_amdsmi_bdf_t', - 'union_amdsmi_cper_valid_bits_t', 'union_amdsmi_nps_caps_t'] + 'struct_pcie_metric_', 'struct_pcie_static_', 'struct_ras_info_', + 'struct_amdsmi_bdf_t', + 'struct_valid_bits_', 'uint32_t', 'uint64_t', 'uint8_t', + 'union_amdsmi_bdf_t', 'union_amdsmi_cper_valid_bits_t', + 'union_amdsmi_nps_caps_t', 'union_policy_data_']